Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4246 → Rev 4251

/drivers/video/Intel-2D/kgem-sna.c
File deleted
/drivers/video/Intel-2D/brw/brw_eu.h
2239,8 → 2239,6
struct brw_reg src0,
struct brw_reg src1);
 
void brw_print_reg(struct brw_reg reg);
 
static inline void brw_math_invert(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
/drivers/video/Intel-2D/compiler.h
37,21 → 37,48
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define __packed__ __attribute__((__packed__))
#define tightly_packed __attribute__((__packed__))
#define flatten __attribute__((flatten))
#define page_aligned __attribute__((aligned(4096)))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#define noinline
#define force_inline
#define force_inline inline
#define fastcall
#define must_check
#define constant
#define pure
#define __packed__
#define tighly_packed
#define flatten
#define page_aligned
#endif
 
#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor))
 
#if HAS_GCC(4, 5)
#define sse2 __attribute__((target("sse2,fpmath=sse")))
#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 7)
#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast __attribute__((optimize("Ofast")))
#else
#define fast
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((target("inline-all-stringops")))
#else
#define fast_memcpy
#endif
 
#ifdef HAVE_VALGRIND
#define VG(x) x
#else
/drivers/video/Intel-2D/gen3_render.c
107,7 → 107,15
{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
};
 
#define xFixedToDouble(f) pixman_fixed_to_double(f)
187,7 → 195,14
switch (format) {
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_r5g6b5:
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a8:
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return false;
default:
return true;
207,15 → 222,257
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return BIAS | COLR_BUF_ARGB8888;
case PICT_r5g6b5:
case PICT_b5g6r5:
return BIAS | COLR_BUF_RGB565;
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a1b5g5r5:
case PICT_x1b5g5r5:
return BIAS | COLR_BUF_ARGB1555;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a2b10g10r10:
case PICT_x2b10g10r10:
return BIAS | COLR_BUF_ARGB2AAA;
case PICT_a8:
return BIAS | COLR_BUF_8BIT;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
case PICT_a4b4g4r4:
case PICT_x4b4g4r4:
return BIAS | COLR_BUF_ARGB4444;
}
#undef BIAS
}
 
 
#if 0
static bool gen3_check_repeat(PicturePtr p)
{
if (!p->repeat)
return true;
 
switch (p->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
 
static uint32_t gen3_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
case PictFilterBilinear:
return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
}
}
 
static bool gen3_check_filter(PicturePtr p)
{
switch (p->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
int16_t dst_x, dst_y;
int16_t src_x, src_y;
 
dst_x = r->dst.x + op->dst.x;
dst_y = r->dst.y + op->dst.y;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
 
gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
OUT_VERTEX(src_x + r->width);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y);
}
 
fastcall static void
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2;
v[1] = box->y2;
v[2] = box->x2 + op->src.offset[0];
v[3] = box->y2 + op->src.offset[1];
 
v[4] = box->x1;
v[5] = box->y2;
v[6] = box->x1 + op->src.offset[0];
v[7] = box->y2 + op->src.offset[1];
 
v[8] = box->x1;
v[9] = box->y1;
v[10] = box->x1 + op->src.offset[0];
v[11] = box->y1 + op->src.offset[1];
 
v += 12;
box++;
} while (--nbox);
}
fastcall static void
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
const PictTransform *transform = op->src.transform;
 
do {
v[0] = box->x2;
v[1] = box->y2;
_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[2], &v[3]);
 
v[4] = box->x1;
v[5] = box->y2;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[6], &v[7]);
 
v[8] = box->x1;
v[9] = box->y1;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y1 + op->src.offset[1],
transform, op->src.scale,
&v[10], &v[11]);
 
box++;
v += 12;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[2] = v[6] + w * op->src.scale[0];
 
v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
 
fastcall static void
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2 + op->dst.x;
v[8] = v[4] = box->x1 + op->dst.x;
v[5] = v[1] = box->y2 + op->dst.y;
v[9] = box->y1 + op->dst.y;
 
v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 12;
box++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x;
v[9] = r->dst.y;
 
v[0] = v[4] + w;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = r->src.x * op->src.scale[0];
v[11] = r->src.y * op->src.scale[1];
 
v[2] = v[6] + w * op->src.scale[0];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
fastcall static void
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
v[2] = v[6] + w * op->mask.scale[0];
 
v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
v[7] = v[3] = v[11] + h * op->mask.scale[1];
}
#endif
 
fastcall static void
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
884,6 → 1141,7
 
state->current_dst = bo->unique_id;
}
assert(bo->exec);
kgem_bo_mark_dirty(bo);
}
 
938,6 → 1196,7
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->src.card_format);
map[tex_count * 2 + 0] =
op->src.card_format |
gen3_ms_tiling(op->src.bo->tiling) |
973,6 → 1232,7
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->mask.card_format);
map[tex_count * 2 + 0] =
op->mask.card_format |
gen3_ms_tiling(op->mask.bo->tiling) |
1357,8 → 1617,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
1494,88 → 1754,674
return false;
}
 
#if 0
static int
gen3_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->card_format = 0;
 
if (picture->pDrawable == NULL) {
SourcePict *source = picture->pSourcePict;
int ret = -1;
 
switch (source->type) {
case SourcePictTypeSolidFill:
DBG(("%s: solid fill [%08x], format %08x\n",
__FUNCTION__,
(unsigned)source->solidFill.color,
(unsigned)picture->format));
ret = gen3_init_solid(channel, source->solidFill.color);
break;
 
case SourcePictTypeLinear:
ret = gen3_init_linear(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
 
case SourcePictTypeRadial:
ret = gen3_init_radial(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
}
 
if (ret == -1) {
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (sna_picture_is_solid(picture, &color)) {
DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color);
}
 
if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
}
 
if (!gen3_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen3_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
channel->pict_format = picture->format;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else {
channel->transform = picture->transform;
channel->is_affine = sna_transform_is_affine(picture->transform);
}
 
if (!gen3_composite_channel_set_format(channel, picture->format) &&
!gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
assert(channel->card_format);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
__FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height,
x, y, w, h));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static inline bool
source_use_blt(struct sna *sna, PicturePtr picture)
{
/* If it is a solid, try to use the BLT paths */
if (!picture->pDrawable)
return picture->pSourcePict->type == SourcePictTypeSolidFill;
 
if (picture->pDrawable->width == 1 &&
picture->pDrawable->height == 1 &&
picture->repeat)
return true;
 
if (too_large(picture->pDrawable->width, picture->pDrawable->height))
return true;
 
return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst,
PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
DBG(("%s: target too large for 3D pipe (%d, %d)\n",
__FUNCTION__,
dst->pDrawable->width, dst->pDrawable->height));
return true;
}
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return source_use_blt(sna, src);
}
#endif
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
}
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
#if 0
static bool
gen3_composite_fallback(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen3_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
if (mask &&
mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
op != PictOpOver) {
DBG(("%s: component-alpha mask with op=%d, should fallback\n",
__FUNCTION__, op));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask && mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
__FUNCTION__, dst_use_cpu(dst_pixmap)));
return dst_use_cpu(dst_pixmap);
}
 
static bool
gen3_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t mask_x, int16_t mask_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s()\n", __FUNCTION__));
 
if (op >= ARRAY_SIZE(gen3_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
/* Try to use the BLT engine unless it implies a
* 3D -> 2D context switch.
*/
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna,
op, src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen3_composite_fallback(sna, op, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
mask_x, mask_y,
dst_x, dst_y,
width, height,
tmp);
 
if (!gen3_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height)) {
DBG(("%s: unable to set render target\n",
__FUNCTION__));
return false;
}
 
tmp->op = op;
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
if (too_large(tmp->dst.width, tmp->dst.height) ||
!gen3_check_pitch_3d(tmp->dst.bo)) {
if (!sna_render_composite_redirect(sna, tmp,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
}
 
tmp->u.gen3.num_constants = 0;
tmp->src.u.gen3.type = SHADER_TEXTURE;
tmp->src.is_affine = true;
DBG(("%s: preparing source\n", __FUNCTION__));
switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
tmp->src.u.gen3.type = SHADER_ZERO;
break;
case 1:
if (mask == NULL && tmp->src.bo &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen3_composite_channel_convert(&tmp->src);
break;
}
DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
 
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->mask.is_affine = true;
tmp->need_magic_ca_pass = false;
tmp->has_component_alpha = false;
if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, mask_x, mask_y)) {
tmp->mask.u.gen3.type = SHADER_TEXTURE;
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
mask_x, mask_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
tmp->mask.u.gen3.type = SHADER_ZERO;
break;
case 1:
gen3_composite_channel_convert(&tmp->mask);
break;
}
}
DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
if (tmp->mask.u.gen3.type == SHADER_ZERO) {
if (tmp->src.bo) {
kgem_bo_destroy(&sna->kgem,
tmp->src.bo);
tmp->src.bo = NULL;
}
tmp->src.u.gen3.type = SHADER_ZERO;
tmp->mask.u.gen3.type = SHADER_NONE;
}
 
if (tmp->mask.u.gen3.type != SHADER_NONE) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
/* Check if it's component alpha that relies on a source alpha
* and on the source value. We can only get one of those
* into the single source value that we get to blend with.
*/
DBG(("%s: component-alpha mask: %d\n",
__FUNCTION__, tmp->mask.u.gen3.type));
tmp->has_component_alpha = true;
if (tmp->mask.u.gen3.type == SHADER_WHITE) {
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->has_component_alpha = false;
} else if (gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
if (op != PictOpOver)
goto cleanup_mask;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
} else {
if (tmp->mask.is_opaque) {
tmp->mask.u.gen3.type = SHADER_NONE;
} else if (is_constant_ps(tmp->src.u.gen3.type) &&
is_constant_ps(tmp->mask.u.gen3.type)) {
uint32_t v;
 
v = multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
24);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
16);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
8);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
0);
 
DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
__FUNCTION__,
tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
v));
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
tmp->src.u.gen3.type = SHADER_CONSTANT;
tmp->src.u.gen3.mode = v;
tmp->src.is_opaque = false;
 
tmp->mask.u.gen3.type = SHADER_NONE;
}
}
}
}
DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
tmp->src.is_affine, tmp->mask.is_affine));
 
tmp->prim_emit = gen3_emit_composite_primitive;
if (is_constant_ps(tmp->mask.u.gen3.type)) {
switch (tmp->src.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
} else
#endif
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
tmp->prim_emit = gen3_emit_composite_primitive_constant;
tmp->emit_boxes = gen3_emit_composite_boxes_constant;
}
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
}
}
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
}
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
}
}
break;
}
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
if (tmp->mask.transform == NULL) {
if (is_constant_ps(tmp->src.u.gen3.type)) {
if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
}
}
} else if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
}
}
}
}
 
tmp->floats_per_vertex = 2;
if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
!is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
!is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
tmp->floats_per_vertex,
tmp->prim_emit != gen3_emit_composite_primitive));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
tmp->boxes = gen3_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen3_render_composite_boxes;
tmp->thread_boxes = gen3_render_composite_boxes__thread;
}
tmp->done = gen3_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
#endif
 
 
 
1710,19 → 2556,6
 
 
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
 
 
1825,13 → 2658,59
 
 
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
const char *gen3_render_init(struct sna *sna, const char *backend)
{
struct sna_render *render = &sna->render;
 
#if 0
#if !NO_COMPOSITE
render->composite = gen3_render_composite;
render->prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
render->check_composite_spans = gen3_check_composite_spans;
render->composite_spans = gen3_render_composite_spans;
render->prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
render->video = gen3_render_video;
 
render->copy_boxes = gen3_render_copy_boxes;
render->copy = gen3_render_copy;
 
render->fill_boxes = gen3_render_fill_boxes;
render->fill = gen3_render_fill;
render->fill_one = gen3_render_fill_one;
#endif
 
render->blit_tex = gen3_blit_tex;
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return "Alviso (gen3)";
}
 
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
1933,39 → 2812,3
gen3_align_vertex(sna, tmp);
return true;
}
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
bool gen3_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
 
// render->video = gen3_render_video;
 
render->blit_tex = gen3_blit_tex;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return true;
}
/drivers/video/Intel-2D/gen4_render.c
51,6 → 51,7
*/
#define FORCE_SPANS 0
#define FORCE_NONRECTILINEAR_SPANS -1
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
61,6 → 62,8
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
 
#define MAX_FLUSH_VERTICES 6
 
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
286,8 → 289,24
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
299,8 → 318,22
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
393,7 → 426,7
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format);
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
408,9 → 441,10
GEN4_SURFACE_BLEND_ENABLED |
format << GEN4_SURFACE_FORMAT_SHIFT);
 
if (is_dst)
if (is_dst) {
ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
421,7 → 455,7
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format, offset);
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
476,7 → 510,7
static bool gen4_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
unsigned int id = 1 << op->u.gen4.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
484,13 → 518,14
 
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
if ((sna->render.vb_id & (1 << id)) == 0)
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
ndwords += 2*FORCE_FLUSH;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & (1 << id)) == 0)
if ((sna->render.vb_id & id) == 0)
gen4_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
508,7 → 543,8
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
if (!kgem_check_batch(&sna->kgem,
2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
534,6 → 570,27
int rem;
 
assert(want);
#if FORCE_FLUSH
rem = sna->render.vertex_offset;
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
rem = sna->kgem.nbatch - 5;
if (rem) {
rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
if (rem <= 0) {
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
rem = MAX_FLUSH_VERTICES;
}
} else
rem = MAX_FLUSH_VERTICES;
if (want > rem)
want = rem;
#endif
 
start:
rem = vertex_space(sna);
552,8 → 609,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
673,7 → 730,7
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
904,6 → 961,8
{
bool flush;
 
assert(op->dst.bo->exec);
 
flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
984,23 → 1043,450
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen4_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen4_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen4_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
#if !FORCE_FLUSH
static void
gen4_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen4_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen4_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN4_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen4_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen4_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
static bool
gen4_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen4.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen4.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen4_video_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
do {
int n;
 
n = gen4_get_rectangles(sna, &tmp, nbox,
gen4_video_bind_surfaces);
assert(n);
nbox -= n;
 
do {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
} while (--n);
} while (nbox);
gen4_vertex_flush(sna);
 
return true;
}
 
static int
gen4_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_repeat(picture)) {
DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_filter(picture)) {
DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen4_get_card_format(picture->format);
if (channel->card_format == -1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
{
DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
__FUNCTION__,
channel->repeat, gen4_repeat(channel->repeat),
channel->filter, gen4_repeat(channel->filter)));
channel->repeat = gen4_repeat(channel->repeat);
channel->filter = gen4_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen4_get_card_format(channel->pict_format);
}
#endif
 
static void
gen4_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
1014,113 → 1500,409
 
}
 
#if 0
static bool
gen4_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height))
return true;
 
/* The blitter is much faster for solids */
if (sna_picture_is_solid(src, NULL))
return true;
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return picture_is_cpu(sna, src);
}
 
static bool
check_gradient(PicturePtr picture, bool precise)
{
switch (picture->pSourcePict->type) {
case SourcePictTypeSolidFill:
case SourcePictTypeLinear:
return false;
default:
return precise;
}
}
 
static bool
has_alphamap(PicturePtr p)
{
return p->alphaMap != NULL;
}
 
static bool
need_upload(struct sna *sna, PicturePtr p)
{
return p->pDrawable && untransformed(p) &&
!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
source_is_busy(PixmapPtr pixmap)
{
struct sna_pixmap *priv = sna_pixmap(pixmap);
if (priv == NULL)
return false;
 
if (priv->clear)
return false;
 
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
return true;
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
return priv->gpu_damage && !priv->cpu_damage;
}
 
static bool
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
{
if (sna_picture_is_solid(p, NULL))
return false;
 
if (p->pSourcePict)
return check_gradient(p, precise);
 
if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
return true;
 
/* soft errors: perfer to upload/compute rather than readback */
if (pixmap && source_is_busy(pixmap))
return false;
 
return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
}
 
static bool
gen4_composite_fallback(struct sna *sna,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen4_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(sna, src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(sna, mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
return dst_use_cpu(dst_pixmap);
}
 
static int
reuse_source(struct sna *sna,
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
{
uint32_t color;
 
if (src_x != msk_x || src_y != msk_y)
return false;
 
if (src == mask) {
DBG(("%s: mask is source\n", __FUNCTION__));
*mc = *sc;
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
if (sna_picture_is_solid(mask, &color))
return gen4_channel_init_solid(sna, mc, color);
 
if (sc->is_solid)
return false;
 
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
return false;
 
DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
if (!sna_transform_equal(src->transform, mask->transform))
return false;
 
if (!sna_picture_alphamap_equal(src, mask))
return false;
 
if (!gen4_check_repeat(mask))
return false;
 
if (!gen4_check_filter(mask))
return false;
 
if (!gen4_check_format(mask->format))
return false;
 
DBG(("%s: reusing source channel for mask with a twist\n",
__FUNCTION__));
 
*mc = *sc;
mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
mc->filter = gen4_filter(mask->filter);
mc->pict_format = mask->format;
mc->card_format = gen4_get_card_format(mask->format);
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
gen4_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
if (op >= ARRAY_SIZE(gen4_blend_op))
return false;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen4_composite_fallback(sna, src, mask, dst))
return false;
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
if (!gen4_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
 
tmp->op = op;
switch (gen4_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen4_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen4_blend_op[op].src_alpha &&
(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s -- fallback: unhandled component alpha blend\n",
__FUNCTION__));
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
goto cleanup_src;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen4_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen4_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
1127,9 → 1909,17
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
tmp->boxes = gen4_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen4_render_composite_boxes;
#if !FORCE_FLUSH
tmp->thread_boxes = gen4_render_composite_boxes__thread;
#endif
}
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
1136,13 → 1926,70
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen4_render_flush(struct sna *sna)
{
1230,7 → 2077,7
}
 
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
int gen, uint32_t kernel)
uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
 
1384,7 → 2231,7
}
 
state->vs = gen4_create_vs_unit_state(&general);
state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
state->sf = gen4_create_sf_state(&general, sf);
 
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
1421,20 → 2268,52
return state->general_bo != NULL;
}
 
 
bool gen4_render_init(struct sna *sna)
const char *gen4_render_init(struct sna *sna, const char *backend)
{
if (!gen4_render_setup(sna))
return false;
return backend;
 
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen4_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen4_check_composite_spans;
sna->render.composite_spans = gen4_render_composite_spans;
if (0)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
#if !NO_VIDEO
sna->render.video = gen4_render_video;
#endif
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen4_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen4_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen4_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen4_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen4_render_fill_one;
#endif
 
#endif
 
sna->render.blit_tex = gen4_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
 
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
sna->render.fini = gen4_render_fini;
1441,8 → 2320,95
 
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
}
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
/drivers/video/Intel-2D/gen4_vertex.c
34,6 → 34,10
#include "sna_render_inline.h"
#include "gen4_vertex.h"
 
#ifndef sse2
#define sse2
#endif
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
223,7 → 227,601
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
fastcall static void
/* specialised vertex emission routines */
 
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
force_inline static float
compute_linear(const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
return ((x+channel->offset[0]) * channel->u.linear.dx +
(y+channel->offset[1]) * channel->u.linear.dy +
channel->u.linear.offset);
}
 
sse2 inline static void
emit_texcoord(struct sna *sna,
const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
if (channel->is_solid) {
OUT_VERTEX_F(x);
return;
}
 
x += channel->offset[0];
y += channel->offset[1];
 
if (channel->is_affine) {
float s, t;
 
sna_get_transformed_coordinates(x, y,
channel->transform,
&s, &t);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
} else {
float s, t, w;
 
sna_get_transformed_coordinates_3d(x, y,
channel->transform,
&s, &t, &w);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
OUT_VERTEX_F(w);
}
}
 
sse2 force_inline static void
emit_vertex(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
}
 
sse2 fastcall static void
emit_primitive(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 force_inline static void
emit_vertex_mask(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
emit_texcoord(sna, &op->mask, mskX, mskY);
}
 
sse2 fastcall static void
emit_primitive_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex_mask(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 fastcall static void
emit_primitive_solid(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_solid(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
box++;
v += 6;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse2 fastcall static void
emit_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_simple_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse2 fastcall static void
emit_boxes_simple_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = r->dst.x;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = r->dst.y;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[7], &v[8]);
}
 
sse2 fastcall static void
emit_boxes_affine_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x2,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = box->x1;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = box->y1;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y1,
op->src.transform, op->src.scale,
&v[7], &v[8]);
box++;
v += 9;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
272,8 → 870,831
v[14] = msk_y * op->mask.scale[1];
}
 
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
sse2 fastcall static void
emit_primitive_simple_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = ((r->src.x + tx) * xx + x0) * sx;
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = ((r->src.y + ty) * yy + y0) * sy;
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
sse2 fastcall static void
emit_primitive_affine_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[6], &v[7]);
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[11], &v[12]);
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
/* SSE4_2 */
#if defined(sse4_2)
 
sse4_2 fastcall static void
emit_primitive_linear__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse4_2 fastcall static void
emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_simple_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse4_2 fastcall static void
emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse4_2 fastcall static void
emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
/* AVX2 */
#if defined(avx2)
 
avx2 fastcall static void
emit_primitive_linear__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
avx2 fastcall static void
emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_simple_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
avx2 fastcall static void
emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
avx2 fastcall static void
emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_linear_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
{
unsigned vb;
 
if (tmp->mask.bo) {
280,8 → 1701,38
if (tmp->mask.transform == NULL) {
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source_mask;
288,17 → 1739,130
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_simple_source_identity;
} else {
DBG(("%s: affine src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source_identity;
}
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else {
DBG(("%s: projective source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 6;
vb = 2 << 2 | 3;
}
} else {
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 1;
vb = 0;
if (tmp->mask.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1 << 2;
} else if (tmp->mask.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 << 2;
}else {
tmp->floats_per_vertex += 3;
vb |= 3 << 2;
}
if (tmp->src.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1;
} else if (tmp->src.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 ;
}else {
tmp->floats_per_vertex += 3;
vb |= 3;
}
DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
__FUNCTION__,tmp->floats_per_vertex, vb));
}
} else {
#if 0
if (tmp->src.is_solid) {
DBG(("%s: solid, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_solid;
if (tmp->src.is_opaque && tmp->op == PictOpOver)
tmp->op = PictOpSrc;
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear;
}
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_simple_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_simple_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_simple_source;
}
} else {
DBG(("%s: affine src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else {
DBG(("%s: projective src, no mask\n", __FUNCTION__));
assert(!tmp->src.is_solid);
tmp->prim_emit = emit_primitive;
tmp->floats_per_vertex = 4;
vb = 3;
}
#endif
}
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
return vb;
}
 
 
 
/drivers/video/Intel-2D/gen4_vertex.h
10,7 → 10,6
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
 
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp);
 
#endif /* GEN4_VERTEX_H */
/drivers/video/Intel-2D/gen5_render.c
279,8 → 279,24
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
292,8 → 308,22
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
typedef struct gen5_surface_state_padded {
384,7 → 414,7
 
/* After the first bind, we manage the cache domains within the batch */
if (!DBG_NO_SURFACE_CACHE) {
offset = kgem_bo_get_binding(bo, format);
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
400,9 → 430,10
GEN5_SURFACE_BLEND_ENABLED |
format << GEN5_SURFACE_FORMAT_SHIFT);
 
if (is_dst)
if (is_dst) {
ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
413,7 → 444,7
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format, offset);
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
541,8 → 572,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
649,8 → 680,13
*
* However, the kernel flushes the pipeline between batches,
* so we should be safe....
* OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
*
* On the other hand, after using BLT we must use a non-pipelined
* operation...
*/
if (sna->kgem.nreloc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen5_emit_state_base_address(sna);
732,12 → 768,15
kernel);
bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
__FUNCTION__, sp, bp, key,
sna->render_state.gen5.last_pipelined_pointers & 0xffff,
(sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
sna->render_state.gen5.last_pipelined_pointers));
if (key == sna->render_state.gen5.last_pipelined_pointers)
return false;
 
 
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen5.vs);
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
746,11 → 785,15
OUT_BATCH(sna->render_state.gen5.wm + sp);
OUT_BATCH(sna->render_state.gen5.cc + bp);
 
bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
sna->render_state.gen5.last_pipelined_pointers = key;
return true;
 
gen5_emit_urb(sna);
 
return bp;
}
 
static void
static bool
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
762,7 → 805,7
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.drawrect_limit == limit &&
sna->render_state.gen5.drawrect_offset == offset)
return;
return false;
 
sna->render_state.gen5.drawrect_offset = offset;
sna->render_state.gen5.drawrect_limit = limit;
771,6 → 814,7
OUT_BATCH(0x00000000);
OUT_BATCH(limit);
OUT_BATCH(offset);
return true;
}
 
static void
892,11 → 936,35
}
}
 
inline static void
gen5_emit_pipe_flush(struct sna *sna)
{
OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
bool flush = false;
 
assert(op->dst.bo->exec);
 
/* drawrect must be first for Ironlake BLT workaround */
if (gen5_emit_drawing_rectangle(sna, op))
offset &= ~1;
gen5_emit_binding_table(sna, offset & ~1);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
DBG(("%s: changed blend state, flush required? %d\n",
__FUNCTION__, (offset & 1) && op->op > PictOpSrc));
flush = (offset & 1) && op->op > PictOpSrc;
}
gen5_emit_vertex_elements(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
904,19 → 972,18
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
 
/* drawrect must be first for Ironlake BLT workaround */
gen5_emit_drawing_rectangle(sna, op);
gen5_emit_binding_table(sna, offset);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
gen5_emit_vertex_elements(sna, op);
if (flush) {
DBG(("%s: forcing flush\n", __FUNCTION__));
gen5_emit_pipe_flush(sna);
}
}
 
static void gen5_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
953,7 → 1020,7
offset = sna->render_state.gen5.surface_table;
}
 
gen5_emit_state(sna, op, offset);
gen5_emit_state(sna, op, offset | dirty);
}
 
fastcall static void
972,8 → 1039,335
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen5_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen5_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen5_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen5_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen5_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen5_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN5_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen5_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen5_get_batch(sna, op);
 
binding_table = gen5_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen5_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen5_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen5_emit_state(sna, op, offset | dirty);
}
 
static bool
gen5_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen5.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen5.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen5_video_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen4_vertex_flush(sna);
return true;
}
#endif
 
static void
gen5_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
986,69 → 1380,175
 
}
 
#if 0
static bool
gen5_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
gen5_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
if (op >= ARRAY_SIZE(gen5_blend_op)) {
DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
return false;
}
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen5_composite_fallback(sna, src, mask, dst))
return false;
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
if (!gen5_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
 
DBG(("%s: preparing source\n", __FUNCTION__));
tmp->op = op;
switch (gen5_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen5_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen5_blend_op[op].src_alpha &&
(gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
goto cleanup_src;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen5_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->mask);
break;
}
}
tmp->is_affine &= tmp->mask.is_affine;
}
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
1055,15 → 1555,23
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen5_render_composite_blt;
// tmp->box = gen5_render_composite_box;
tmp->box = gen5_render_composite_box;
tmp->boxes = gen5_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen5_render_composite_boxes;
tmp->thread_boxes = gen5_render_composite_boxes__thread;
}
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
goto cleanup_mask;
}
 
gen5_bind_surfaces(sna, tmp);
1070,11 → 1578,448
gen5_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen5_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen5_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen5_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
DBG(("%s()\n", __FUNCTION__));
 
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen5_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen5_blend_op))
return false;
 
if (gen5_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
dst->format == PICT_a8)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen5_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen5_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height,
true))
return false;
 
switch (gen5_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->base.src);
break;
}
 
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = false;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
}
 
gen5_bind_surfaces(sna, &tmp->base);
gen5_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
 
 
static bool
gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
 
DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
__FUNCTION__, alu,
src->drawable.serialNumber, src_bo->handle,
dst->drawable.serialNumber, dst_bo->handle,
n, box->x1, box->y1, box->x2, box->y2,
flags));
 
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
memset(&tmp, 0, sizeof(tmp));
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen5_check_format(tmp.src.pict_format)) {
DBG(("%s: unsupported source format, %x, use BLT\n",
__FUNCTION__, tmp.src.pict_format));
goto fallback_blt;
}
 
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.x = tmp.dst.y = 0;
tmp.dst.bo = dst_bo;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
goto fallback_tiled;
}
 
tmp.src.filter = SAMPLER_FILTER_NEAREST;
tmp.src.repeat = SAMPLER_EXTEND_NONE;
tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
goto fallback_tiled_dst;
} else {
tmp.src.bo = kgem_bo_reference(src_bo);
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
tmp.src.offset[0] = tmp.src.offset[1] = 0;
tmp.src.scale[0] = 1.f/src->drawable.width;
tmp.src.scale[1] = 1.f/src->drawable.height;
}
 
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.u.gen5.wm_kernel = WM_KERNEL;
tmp.u.gen5.ve_id = 2;
 
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
DBG(("%s: aperture check failed\n", __FUNCTION__));
goto fallback_tiled_src;
}
}
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
tmp.dst.x = tmp.dst.y = 0;
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
 
gen5_copy_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
do {
int n_this_time;
 
n_this_time = gen5_get_rectangles(sna, &tmp, n,
gen5_copy_bind_surfaces);
n -= n_this_time;
 
do {
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
 
box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
DBG(("%s: tiled fallback\n", __FUNCTION__));
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
#endif
 
static void
gen5_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
1397,16 → 2342,38
return state->general_bo != NULL;
}
 
bool gen5_render_init(struct sna *sna)
const char *gen5_render_init(struct sna *sna, const char *backend)
{
if (!gen5_render_setup(sna))
return false;
return backend;
 
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen5_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen5_check_composite_spans;
sna->render.composite_spans = gen5_render_composite_spans;
if (sna->PciInfo->device_id == 0x0044)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen5_render_video;
 
sna->render.copy_boxes = gen5_render_copy_boxes;
sna->render.copy = gen5_render_copy;
 
sna->render.fill_boxes = gen5_render_fill_boxes;
sna->render.fill = gen5_render_fill;
sna->render.fill_one = gen5_render_fill_one;
#endif
 
sna->render.blit_tex = gen5_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen5_render_flush;
sna->render.reset = gen5_render_reset;
1414,7 → 2381,89
 
sna->render.max_3d_size = MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return "Ironlake (gen5)";
};
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen5_render_composite_blt;
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
}
 
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/gen6_render.c
30,9 → 30,6
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
54,7 → 51,7
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 1
#define NO_RING_SWITCH 0
#define PREFER_RENDER 0
 
#define USE_8_PIXEL_DISPATCH 1
68,6 → 65,7
#define GEN6_MAX_SIZE 8192
 
struct gt_info {
const char *name;
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
79,6 → 77,7
};
 
static const struct gt_info gt1_info = {
.name = "Sandybridge (gen6, gt1)",
.max_vs_threads = 24,
.max_gs_threads = 21,
.max_wm_threads = 40,
86,6 → 85,7
};
 
static const struct gt_info gt2_info = {
.name = "Sandybridge (gen6, gt2)",
.max_vs_threads = 60,
.max_gs_threads = 60,
.max_wm_threads = 80,
254,18 → 254,6
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
};
 
/*
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
285,7 → 273,6
case PICT_a4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
*/
}
 
static uint32_t gen6_get_dest_format(PictFormat format)
403,7 → 390,6
int base;
 
if (has_mask) {
 
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
base = GEN6_WM_KERNEL_MASKSA;
411,7 → 397,6
base = GEN6_WM_KERNEL_MASKCA;
} else
base = GEN6_WM_KERNEL_MASK;
 
} else
base = GEN6_WM_KERNEL_NOMASK;
 
889,6 → 874,8
{
bool need_stall = wm_binding_table & 1;
 
assert(op->dst.bo->exec);
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
902,7 → 889,7
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
if (op->dst.bo->exec)
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
1039,8 → 1026,6
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
return 0;
/*
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
1047,7 → 1032,6
case I915_TILING_X: return GEN6_SURFACE_TILED;
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
}
*/
}
 
/**
1068,7 → 1052,7
uint32_t is_scanout = is_dst && bo->scanout;
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
1084,9 → 1068,10
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
if (is_dst)
if (is_dst) {
ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
else
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
1095,9 → 1080,9
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = is_scanout ? 0 : 3 << 16;
ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
 
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
1210,6 → 1195,8
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
1220,10 → 1207,15
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0 &&
!gen6_rectangle_begin(sna, op)))
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen6_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
1340,7 → 1332,6
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
 
 
fastcall static void
gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
1351,7 → 1342,6
}
 
#if 0
 
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
1462,7 → 1452,6
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#endif
 
#ifndef MAX
1502,7 → 1491,6
}
 
#if 0
 
static uint32_t gen6_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
1606,21 → 1594,24
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__, src_w, src_h, drw_w, drw_h,
REGION_NUM_RECTS(dstRegion),
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
1644,7 → 1635,7
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_w == drw_w && src_h == drw_h)
if (src_width == dst_width && src_height == dst_height)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
1680,10 → 1671,12
pix_yoff = 0;
#endif
 
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
1697,16 → 1690,16
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
1714,7 → 1707,6
}
box++;
}
priv->clear = false;
 
gen4_vertex_flush(sna);
return true;
1822,7 → 1814,6
channel->card_format = gen6_get_card_format(channel->pict_format);
assert(channel->card_format != (unsigned)-1);
}
 
#endif
 
static void gen6_render_composite_done(struct sna *sna,
1837,16 → 1828,15
}
 
 
// sna_render_composite_redirect_done(sna, op);
}
 
#if 0
 
static bool
gen6_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h)
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
1889,7 → 1879,23
return true;
}
 
static bool
prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
{
if (untiled_tlb_miss(tmp->dst.bo) ||
untiled_tlb_miss(tmp->src.bo))
return true;
 
if (kgem_bo_is_render(tmp->dst.bo) ||
kgem_bo_is_render(tmp->src.bo))
return false;
 
if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
 
return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo);
}
 
static bool
gen6_render_composite(struct sna *sna,
uint8_t op,
1908,11 → 1914,33
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen6_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (op == PictOpClear)
op = PictOpSrc;
tmp->op = op;
if (!gen6_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height))
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->src,
2000,15 → 2028,15
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
gen4_choose_composite_emitter(tmp));
gen4_choose_composite_emitter(sna, tmp));
 
// tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
// tmp->boxes = gen6_render_composite_boxes__blt;
// if (tmp->emit_boxes) {
// tmp->boxes = gen6_render_composite_boxes;
// tmp->thread_boxes = gen6_render_composite_boxes__thread;
// }
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
tmp->boxes = gen6_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen6_render_composite_boxes;
tmp->thread_boxes = gen6_render_composite_boxes__thread;
}
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2039,7 → 2067,6
return false;
}
 
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
2207,7 → 2234,7
 
tmp->base.op = op;
if (!gen6_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height))
dst_x, dst_y, width, height, true))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->base.src,
2237,7 → 2264,7
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
gen4_choose_spans_emitter(tmp));
gen4_choose_spans_emitter(sna, tmp));
 
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
2271,7 → 2298,6
}
#endif
 
 
static void
gen6_emit_copy_state(struct sna *sna,
const struct sna_composite_op *op)
2304,7 → 2330,6
gen6_emit_state(sna, op, offset | dirty);
}
 
 
static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *src_bo,
struct kgem_bo *dst_bo,
2326,11 → 2351,14
untiled_tlb_miss(dst_bo))
return true;
 
if (kgem_bo_is_render(dst_bo) ||
kgem_bo_is_render(src_bo))
return false;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
return (prefer_blt_bo(sna, src_bo) >= 0 &&
prefer_blt_bo(sna, dst_bo) > 0);
return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
}
 
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
2464,7 → 2492,8
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
extents.y2 - extents.y1,
n > 1))
goto fallback_tiled;
 
dst_dx += tmp.dst.x;
2479,14 → 2508,14
 
extents = box[0];
for (i = 1; i < n; i++) {
if (extents.x1 < box[i].x1)
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (extents.y1 < box[i].y1)
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (extents.x2 > box[i].x2)
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (extents.y2 > box[i].y2)
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
2632,10 → 2661,35
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
too_large(src->drawable.width, src->drawable.height) ||
too_large(dst->drawable.width, dst->drawable.height)) {
fallback:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
op->base.dst.format = PIXMAN_a8r8g8b8;
return sna_blt_copy(sna, alu, src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op);
}
 
if (dst->drawable.depth == src->drawable.depth) {
op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
op->base.src.pict_format = op->base.dst.format;
} else {
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(op->base.src.pict_format))
goto fallback;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
2675,119 → 2729,7
}
#endif
 
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
 
 
 
#if 0
 
static void
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
{
2826,10 → 2768,16
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (kgem_bo_is_render(bo))
return false;
 
if (untiled_tlb_miss(bo))
return true;
 
return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
if (!prefer_blt_ring(sna, bo, 0))
return false;
 
return prefer_blt_bo(sna, bo);
}
 
static bool
2910,7 → 2858,8
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
extents.y2 - extents.y1,
n > 1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
3385,12 → 3334,12
 
static bool is_gt2(struct sna *sna)
{
return DEVICE_ID(sna->PciInfo) & 0x30;
return sna->PciInfo->device_id & 0x30;
}
 
static bool is_mobile(struct sna *sna)
{
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
return (sna->PciInfo->device_id & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
3471,28 → 3420,52
return state->general_bo != NULL;
}
 
bool gen6_render_init(struct sna *sna)
const char *gen6_render_init(struct sna *sna, const char *backend)
{
if (!gen6_render_setup(sna))
return false;
return backend;
 
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen6_render_expire;
 
// sna->render.composite = gen6_render_composite;
// sna->render.video = gen6_render_video;
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen6_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
 
// sna->render.copy_boxes = gen6_render_copy_boxes;
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen6_check_composite_spans;
sna->render.composite_spans = gen6_render_composite_spans;
if (is_mobile(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen6_render_video;
 
sna->render.blit_tex = gen6_blit_tex;
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen6_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen6_render_copy;
#endif
 
// sna->render.copy = gen6_render_copy;
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen6_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen6_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen6_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen6_render_clear;
#endif
#endif
 
// sna->render.fill_boxes = gen6_render_fill_boxes;
// sna->render.fill = gen6_render_fill;
// sna->render.fill_one = gen6_render_fill_one;
// sna->render.clear = gen6_render_clear;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
sna->render.blit_tex = gen6_blit_tex;
 
sna->render.flush = gen6_render_flush;
sna->render.reset = gen6_render_reset;
3500,9 → 3473,113
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return sna->render_state.gen6.info->name;
}
return true;
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/gen7_render.c
45,6 → 45,8
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define ALWAYS_FLUSH 0
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
75,6 → 77,7
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 
struct gt_info {
const char *name;
uint32_t max_vs_threads;
uint32_t max_gs_threads;
uint32_t max_wm_threads;
82,57 → 85,107
int size;
int max_vs_entries;
int max_gs_entries;
int push_ps_size; /* in 1KBs */
} urb;
int gt;
};
 
static const struct gt_info ivb_gt_info = {
.name = "Ivybridge (gen7)",
.max_vs_threads = 16,
.max_gs_threads = 16,
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 64, 64 },
.urb = { 128, 64, 64, 8 },
.gt = 0,
};
 
static const struct gt_info ivb_gt1_info = {
.name = "Ivybridge (gen7, gt1)",
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192 },
.urb = { 128, 512, 192, 8 },
.gt = 1,
};
 
static const struct gt_info ivb_gt2_info = {
.name = "Ivybridge (gen7, gt2)",
.max_vs_threads = 128,
.max_gs_threads = 128,
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 256, 704, 320 },
.urb = { 256, 704, 320, 8 },
.gt = 2,
};
 
static const struct gt_info byt_gt_info = {
.name = "Baytrail (gen7)",
.urb = { 128, 64, 64 },
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
.gt = 1,
};
 
static const struct gt_info hsw_gt_info = {
.name = "Haswell (gen7.5)",
.max_vs_threads = 8,
.max_gs_threads = 8,
.max_wm_threads =
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 64, 64 },
.urb = { 128, 64, 64, 8 },
.gt = 0,
};
 
static const struct gt_info hsw_gt1_info = {
.name = "Haswell (gen7.5, gt1)",
.max_vs_threads = 70,
.max_gs_threads = 70,
.max_wm_threads =
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 640, 256 },
.urb = { 128, 640, 256, 8 },
.gt = 1,
};
 
static const struct gt_info hsw_gt2_info = {
.name = "Haswell (gen7.5, gt2)",
.max_vs_threads = 140,
.max_gs_threads = 140,
.max_wm_threads =
(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640, 8 },
.gt = 2,
};
 
static const struct gt_info hsw_gt3_info = {
.name = "Haswell (gen7.5, gt3)",
.max_vs_threads = 280,
.max_gs_threads = 280,
.max_wm_threads =
(204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640 },
.urb = { 512, 3328, 1280, 16 },
.gt = 3,
};
 
inline static bool is_ivb(struct sna *sna)
{
return sna->kgem.gen == 070;
}
 
inline static bool is_byt(struct sna *sna)
{
return sna->kgem.gen == 071;
}
 
inline static bool is_hsw(struct sna *sna)
{
return sna->kgem.gen == 075;
}
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
294,8 → 347,24
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
307,8 → 376,22
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
335,7 → 418,7
gen7_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
OUT_BATCH(8); /* in 1KBs */
OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
 
/* num of VS entries must be divisible by 8 if size < 9 */
OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
359,6 → 442,10
static void
gen7_emit_state_base_address(struct sna *sna)
{
uint32_t mocs;
 
mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
 
OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
366,17 → 453,17
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
mocs | BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
mocs | BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
905,10 → 992,17
}
 
inline static void
gen7_emit_pipe_flush(struct sna *sna)
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
{
unsigned stall;
 
stall = 0;
if (need_stall)
stall = (GEN7_PIPE_CONTROL_CS_STALL |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
 
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
OUT_BATCH(0);
OUT_BATCH(0);
}
930,8 → 1024,7
{
bool need_stall;
 
if (sna->render_state.gen7.emit_flush)
gen7_emit_pipe_flush(sna);
assert(op->dst.bo->exec);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
942,13 → 1035,18
need_stall = gen7_emit_binding_table(sna, wm_binding_table);
need_stall &= gen7_emit_drawing_rectangle(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
if (op->dst.bo->exec)
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
sna->render_state.gen7.emit_flush = false;
need_stall = false;
}
if (sna->render_state.gen7.emit_flush) {
gen7_emit_pipe_flush(sna, need_stall);
need_stall = false;
}
if (need_stall)
gen7_emit_pipe_stall(sna);
 
1091,7 → 1189,7
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
1104,9 → 1202,12
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
gen7_tiling_bits(bo->tiling) |
format << GEN7_SURFACE_FORMAT_SHIFT);
if (is_dst)
if (bo->tiling == I915_TILING_Y)
ss[0] |= GEN7_SURFACE_VALIGN_4;
if (is_dst) {
ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
else
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
1113,13 → 1214,13
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = is_scanout ? 0 : 3 << 16;
ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
ss[6] = 0;
ss[7] = 0;
if (sna->kgem.gen == 075)
if (is_hsw(sna))
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
1242,8 → 1343,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
1398,114 → 1499,266
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
static uint32_t gen7_bind_video_source(struct sna *sna,
struct kgem_bo *bo,
uint32_t offset,
int width,
int height,
int pitch,
uint32_t format)
{
uint32_t *ss, bind;
 
bind = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
 
static void gen7_render_composite_done(struct sna *sna,
assert(bo->tiling == I915_TILING_NONE);
 
ss = sna->kgem.batch + bind;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
format << GEN7_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
I915_GEM_DOMAIN_SAMPLER << 16,
offset);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = 0;
ss[6] = 0;
ss[7] = 0;
if (is_hsw(sna))
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
bind, bo->handle, ss[1],
format, width, height, pitch, offset));
 
return bind * sizeof(uint32_t);
}
 
static void gen7_emit_video_state(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
gen7_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen7_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen7_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen7_emit_state(sna, op, offset);
}
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
gen7_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
tmp->op = PictOpSrc;
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
memset(&tmp, 0, sizeof(tmp));
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_width == dst_width && src_height == dst_height)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
tmp.u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN7_WM_KERNEL_VIDEO_PLANAR :
GEN7_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
gen7_emit_video_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
__FUNCTION__,
frame->src.x1, frame->src.y1,
src_width, src_height,
dst_width, dst_height,
frame->width, frame->height));
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
__FUNCTION__,
src_scale_x, src_scale_y,
src_offset_x, src_offset_y));
 
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
__FUNCTION__,
box->x1, box->y1,
box->x2, box->y2,
pix_xoff, pix_yoff,
box->x1 * src_scale_x + src_offset_x,
box->y1 * src_scale_y + src_offset_y,
box->x2 * src_scale_x + src_offset_x,
box->y2 * src_scale_y + src_offset_y));
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
gen4_vertex_flush(sna);
return true;
}
#endif
 
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
}
 
 
 
1597,6 → 1850,143
 
 
 
 
 
#if 0
static bool
gen7_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen7_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen7_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen7.flags = FILL_FLAGS(op, format);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen7_emit_fill_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen7_get_rectangles(sna, &tmp, n,
gen7_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
#endif
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
1676,14 → 2066,20
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
 
static bool is_gt3(struct sna *sna)
{
assert(sna->kgem.gen == 075);
return sna->PciInfo->device_id & 0x20;
}
 
static bool is_gt2(struct sna *sna)
{
return DEVICE_ID(sna->PciInfo) & 0x20;
return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
}
 
static bool is_mobile(struct sna *sna)
{
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
return (sna->PciInfo->device_id & 0xf) == 0x6;
}
 
static bool gen7_render_setup(struct sna *sna)
1693,19 → 2089,24
struct gen7_sampler_state *ss;
int i, j, k, l, m;
 
if (sna->kgem.gen == 070) {
if (is_ivb(sna)) {
state->info = &ivb_gt_info;
if (DEVICE_ID(sna->PciInfo) & 0xf) {
if (sna->PciInfo->device_id & 0xf) {
state->info = &ivb_gt1_info;
if (is_gt2(sna))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
} else if (sna->kgem.gen == 075) {
} else if (is_byt(sna)) {
state->info = &byt_gt_info;
} else if (is_hsw(sna)) {
state->info = &hsw_gt_info;
if (DEVICE_ID(sna->PciInfo) & 0xf) {
if (sna->PciInfo->device_id & 0xf) {
if (is_gt3(sna))
state->info = &hsw_gt3_info;
else if (is_gt2(sna))
state->info = &hsw_gt2_info;
else
state->info = &hsw_gt1_info;
if (is_gt2(sna))
state->info = &hsw_gt2_info;
}
} else
return false;
1772,16 → 2173,51
return state->general_bo != NULL;
}
 
bool gen7_render_init(struct sna *sna)
const char *gen7_render_init(struct sna *sna, const char *backend)
{
if (!gen7_render_setup(sna))
return false;
return backend;
 
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
sna->kgem.expire = gen7_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen7_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen7_check_composite_spans;
sna->render.composite_spans = gen7_render_composite_spans;
if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen7_render_video;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen7_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen7_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen7_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen7_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen7_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen7_render_clear;
#endif
#endif
 
sna->render.blit_tex = gen7_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
sna->render.flush = gen7_render_flush;
sna->render.reset = gen7_render_reset;
1789,9 → 2225,102
 
sna->render.max_3d_size = GEN7_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return sna->render_state.gen7.info->name;
}
return true;
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
 
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
/drivers/video/Intel-2D/gen7_render.h
1224,6 → 1224,8
 
/* Surface state DW0 */
#define GEN7_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN7_SURFACE_VALIGN_4 (1 << 16)
#define GEN7_SURFACE_HALIGN_8 (1 << 15)
#define GEN7_SURFACE_TILED (1 << 14)
#define GEN7_SURFACE_TILED_Y (1 << 13)
#define GEN7_SURFACE_FORMAT_SHIFT 18
/drivers/video/Intel-2D/i915_pciids.h
0,0 → 1,211
/*
* Copyright 2013 Intel Corporation
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _I915_PCIIDS_H
#define _I915_PCIIDS_H
 
/*
* A pci_device_id struct {
* __u32 vendor, device;
* __u32 subvendor, subdevice;
* __u32 class, class_mask;
* kernel_ulong_t driver_data;
* };
* Don't use C99 here because "class" is reserved and we want to
* give userspace flexibility.
*/
#define INTEL_VGA_DEVICE(id, info) { \
0x8086, id, \
~0, ~0, \
0x030000, 0xff0000, \
(unsigned long) info }
 
#define INTEL_QUANTA_VGA_DEVICE(info) { \
0x8086, 0x16a, \
0x152d, 0x8990, \
0x030000, 0xff0000, \
(unsigned long) info }
 
#define INTEL_I830_IDS(info) \
INTEL_VGA_DEVICE(0x3577, info)
 
#define INTEL_I845G_IDS(info) \
INTEL_VGA_DEVICE(0x2562, info)
 
#define INTEL_I85X_IDS(info) \
INTEL_VGA_DEVICE(0x3582, info), /* I855_GM */ \
INTEL_VGA_DEVICE(0x358e, info)
 
#define INTEL_I865G_IDS(info) \
INTEL_VGA_DEVICE(0x2572, info) /* I865_G */
 
#define INTEL_I915G_IDS(info) \
INTEL_VGA_DEVICE(0x2582, info), /* I915_G */ \
INTEL_VGA_DEVICE(0x258a, info) /* E7221_G */
 
#define INTEL_I915GM_IDS(info) \
INTEL_VGA_DEVICE(0x2592, info) /* I915_GM */
 
#define INTEL_I945G_IDS(info) \
INTEL_VGA_DEVICE(0x2772, info) /* I945_G */
 
#define INTEL_I945GM_IDS(info) \
INTEL_VGA_DEVICE(0x27a2, info), /* I945_GM */ \
INTEL_VGA_DEVICE(0x27ae, info) /* I945_GME */
 
#define INTEL_I965G_IDS(info) \
INTEL_VGA_DEVICE(0x2972, info), /* I946_GZ */ \
INTEL_VGA_DEVICE(0x2982, info), /* G35_G */ \
INTEL_VGA_DEVICE(0x2992, info), /* I965_Q */ \
INTEL_VGA_DEVICE(0x29a2, info) /* I965_G */
 
#define INTEL_G33_IDS(info) \
INTEL_VGA_DEVICE(0x29b2, info), /* Q35_G */ \
INTEL_VGA_DEVICE(0x29c2, info), /* G33_G */ \
INTEL_VGA_DEVICE(0x29d2, info) /* Q33_G */
 
#define INTEL_I965GM_IDS(info) \
INTEL_VGA_DEVICE(0x2a02, info), /* I965_GM */ \
INTEL_VGA_DEVICE(0x2a12, info) /* I965_GME */
 
#define INTEL_GM45_IDS(info) \
INTEL_VGA_DEVICE(0x2a42, info) /* GM45_G */
 
#define INTEL_G45_IDS(info) \
INTEL_VGA_DEVICE(0x2e02, info), /* IGD_E_G */ \
INTEL_VGA_DEVICE(0x2e12, info), /* Q45_G */ \
INTEL_VGA_DEVICE(0x2e22, info), /* G45_G */ \
INTEL_VGA_DEVICE(0x2e32, info), /* G41_G */ \
INTEL_VGA_DEVICE(0x2e42, info), /* B43_G */ \
INTEL_VGA_DEVICE(0x2e92, info) /* B43_G.1 */
 
#define INTEL_PINEVIEW_IDS(info) \
INTEL_VGA_DEVICE(0xa001, info), \
INTEL_VGA_DEVICE(0xa011, info)
 
#define INTEL_IRONLAKE_D_IDS(info) \
INTEL_VGA_DEVICE(0x0042, info)
 
#define INTEL_IRONLAKE_M_IDS(info) \
INTEL_VGA_DEVICE(0x0046, info)
 
#define INTEL_SNB_D_IDS(info) \
INTEL_VGA_DEVICE(0x0102, info), \
INTEL_VGA_DEVICE(0x0112, info), \
INTEL_VGA_DEVICE(0x0122, info), \
INTEL_VGA_DEVICE(0x010A, info)
 
#define INTEL_SNB_M_IDS(info) \
INTEL_VGA_DEVICE(0x0106, info), \
INTEL_VGA_DEVICE(0x0116, info), \
INTEL_VGA_DEVICE(0x0126, info)
 
#define INTEL_IVB_M_IDS(info) \
INTEL_VGA_DEVICE(0x0156, info), /* GT1 mobile */ \
INTEL_VGA_DEVICE(0x0166, info) /* GT2 mobile */
 
#define INTEL_IVB_D_IDS(info) \
INTEL_VGA_DEVICE(0x0152, info), /* GT1 desktop */ \
INTEL_VGA_DEVICE(0x0162, info), /* GT2 desktop */ \
INTEL_VGA_DEVICE(0x015a, info), /* GT1 server */ \
INTEL_VGA_DEVICE(0x016a, info) /* GT2 server */
 
#define INTEL_IVB_Q_IDS(info) \
INTEL_QUANTA_VGA_DEVICE(info) /* Quanta transcode */
 
#define INTEL_HSW_D_IDS(info) \
INTEL_VGA_DEVICE(0x0402, info), /* GT1 desktop */ \
INTEL_VGA_DEVICE(0x0412, info), /* GT2 desktop */ \
INTEL_VGA_DEVICE(0x0422, info), /* GT3 desktop */ \
INTEL_VGA_DEVICE(0x040a, info), /* GT1 server */ \
INTEL_VGA_DEVICE(0x041a, info), /* GT2 server */ \
INTEL_VGA_DEVICE(0x042a, info), /* GT3 server */ \
INTEL_VGA_DEVICE(0x040B, info), /* GT1 reserved */ \
INTEL_VGA_DEVICE(0x041B, info), /* GT2 reserved */ \
INTEL_VGA_DEVICE(0x042B, info), /* GT3 reserved */ \
INTEL_VGA_DEVICE(0x040E, info), /* GT1 reserved */ \
INTEL_VGA_DEVICE(0x041E, info), /* GT2 reserved */ \
INTEL_VGA_DEVICE(0x042E, info), /* GT3 reserved */ \
INTEL_VGA_DEVICE(0x0C02, info), /* SDV GT1 desktop */ \
INTEL_VGA_DEVICE(0x0C12, info), /* SDV GT2 desktop */ \
INTEL_VGA_DEVICE(0x0C22, info), /* SDV GT3 desktop */ \
INTEL_VGA_DEVICE(0x0C0A, info), /* SDV GT1 server */ \
INTEL_VGA_DEVICE(0x0C1A, info), /* SDV GT2 server */ \
INTEL_VGA_DEVICE(0x0C2A, info), /* SDV GT3 server */ \
INTEL_VGA_DEVICE(0x0C0B, info), /* SDV GT1 reserved */ \
INTEL_VGA_DEVICE(0x0C1B, info), /* SDV GT2 reserved */ \
INTEL_VGA_DEVICE(0x0C2B, info), /* SDV GT3 reserved */ \
INTEL_VGA_DEVICE(0x0C0E, info), /* SDV GT1 reserved */ \
INTEL_VGA_DEVICE(0x0C1E, info), /* SDV GT2 reserved */ \
INTEL_VGA_DEVICE(0x0C2E, info), /* SDV GT3 reserved */ \
INTEL_VGA_DEVICE(0x0A02, info), /* ULT GT1 desktop */ \
INTEL_VGA_DEVICE(0x0A12, info), /* ULT GT2 desktop */ \
INTEL_VGA_DEVICE(0x0A22, info), /* ULT GT3 desktop */ \
INTEL_VGA_DEVICE(0x0A0A, info), /* ULT GT1 server */ \
INTEL_VGA_DEVICE(0x0A1A, info), /* ULT GT2 server */ \
INTEL_VGA_DEVICE(0x0A2A, info), /* ULT GT3 server */ \
INTEL_VGA_DEVICE(0x0A0B, info), /* ULT GT1 reserved */ \
INTEL_VGA_DEVICE(0x0A1B, info), /* ULT GT2 reserved */ \
INTEL_VGA_DEVICE(0x0A2B, info), /* ULT GT3 reserved */ \
INTEL_VGA_DEVICE(0x0D02, info), /* CRW GT1 desktop */ \
INTEL_VGA_DEVICE(0x0D12, info), /* CRW GT2 desktop */ \
INTEL_VGA_DEVICE(0x0D22, info), /* CRW GT3 desktop */ \
INTEL_VGA_DEVICE(0x0D0A, info), /* CRW GT1 server */ \
INTEL_VGA_DEVICE(0x0D1A, info), /* CRW GT2 server */ \
INTEL_VGA_DEVICE(0x0D2A, info), /* CRW GT3 server */ \
INTEL_VGA_DEVICE(0x0D0B, info), /* CRW GT1 reserved */ \
INTEL_VGA_DEVICE(0x0D1B, info), /* CRW GT2 reserved */ \
INTEL_VGA_DEVICE(0x0D2B, info), /* CRW GT3 reserved */ \
INTEL_VGA_DEVICE(0x0D0E, info), /* CRW GT1 reserved */ \
INTEL_VGA_DEVICE(0x0D1E, info), /* CRW GT2 reserved */ \
INTEL_VGA_DEVICE(0x0D2E, info) /* CRW GT3 reserved */ \
 
#define INTEL_HSW_M_IDS(info) \
INTEL_VGA_DEVICE(0x0406, info), /* GT1 mobile */ \
INTEL_VGA_DEVICE(0x0416, info), /* GT2 mobile */ \
INTEL_VGA_DEVICE(0x0426, info), /* GT2 mobile */ \
INTEL_VGA_DEVICE(0x0C06, info), /* SDV GT1 mobile */ \
INTEL_VGA_DEVICE(0x0C16, info), /* SDV GT2 mobile */ \
INTEL_VGA_DEVICE(0x0C26, info), /* SDV GT3 mobile */ \
INTEL_VGA_DEVICE(0x0A06, info), /* ULT GT1 mobile */ \
INTEL_VGA_DEVICE(0x0A16, info), /* ULT GT2 mobile */ \
INTEL_VGA_DEVICE(0x0A26, info), /* ULT GT3 mobile */ \
INTEL_VGA_DEVICE(0x0A0E, info), /* ULT GT1 reserved */ \
INTEL_VGA_DEVICE(0x0A1E, info), /* ULT GT2 reserved */ \
INTEL_VGA_DEVICE(0x0A2E, info), /* ULT GT3 reserved */ \
INTEL_VGA_DEVICE(0x0D06, info), /* CRW GT1 mobile */ \
INTEL_VGA_DEVICE(0x0D16, info), /* CRW GT2 mobile */ \
INTEL_VGA_DEVICE(0x0D26, info) /* CRW GT3 mobile */
 
#define INTEL_VLV_M_IDS(info) \
INTEL_VGA_DEVICE(0x0f30, info), \
INTEL_VGA_DEVICE(0x0f31, info), \
INTEL_VGA_DEVICE(0x0f32, info), \
INTEL_VGA_DEVICE(0x0f33, info), \
INTEL_VGA_DEVICE(0x0157, info)
 
#define INTEL_VLV_D_IDS(info) \
INTEL_VGA_DEVICE(0x0155, info)
 
#endif /* _I915_PCIIDS_H */
/drivers/video/Intel-2D/intel_driver.h
9,180 → 9,52
#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR
#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL
 
#ifndef PCI_CHIP_I810
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
#define PCI_CHIP_I810_BRIDGE 0x7120
#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
#define PCI_CHIP_I810_E_BRIDGE 0x7124
#define PCI_CHIP_I815_BRIDGE 0x1130
#endif
 
#ifndef PCI_CHIP_I830_M
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_I830_M_BRIDGE 0x3575
#endif
 
#ifndef PCI_CHIP_845_G
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_845_G_BRIDGE 0x2560
#endif
 
#ifndef PCI_CHIP_I854
#define PCI_CHIP_I854 0x358E
#define PCI_CHIP_I854_BRIDGE 0x358C
#endif
 
#ifndef PCI_CHIP_I855_GM
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I855_GM_BRIDGE 0x3580
#endif
 
#ifndef PCI_CHIP_I865_G
#define PCI_CHIP_I865_G 0x2572
#define PCI_CHIP_I865_G_BRIDGE 0x2570
#endif
 
#ifndef PCI_CHIP_I915_G
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_I915_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I915_GM
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I915_GM_BRIDGE 0x2590
#endif
 
#ifndef PCI_CHIP_E7221_G
#define PCI_CHIP_E7221_G 0x258A
/* Same as I915_G_BRIDGE */
#define PCI_CHIP_E7221_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I945_G
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_G_BRIDGE 0x2770
#endif
 
#ifndef PCI_CHIP_I945_GM
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
#endif
 
#ifndef PCI_CHIP_I945_GME
#define PCI_CHIP_I945_GME 0x27AE
#define PCI_CHIP_I945_GME_BRIDGE 0x27AC
#endif
 
#ifndef PCI_CHIP_PINEVIEW_M
#define PCI_CHIP_PINEVIEW_M 0xA011
#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010
#define PCI_CHIP_PINEVIEW_G 0xA001
#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000
#endif
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_Q33_G 0x29D2
 
#ifndef PCI_CHIP_G35_G
#define PCI_CHIP_G35_G 0x2982
#define PCI_CHIP_G35_G_BRIDGE 0x2980
#endif
 
#ifndef PCI_CHIP_I965_Q
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_Q_BRIDGE 0x2990
#endif
 
#ifndef PCI_CHIP_I965_G
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_G_BRIDGE 0x29A0
#endif
 
#ifndef PCI_CHIP_I946_GZ
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I946_GZ_BRIDGE 0x2970
#endif
 
#ifndef PCI_CHIP_I965_GM
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GM_BRIDGE 0x2A00
#endif
 
#ifndef PCI_CHIP_I965_GME
#define PCI_CHIP_I965_GME 0x2A12
#define PCI_CHIP_I965_GME_BRIDGE 0x2A10
#endif
 
#ifndef PCI_CHIP_G33_G
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_G33_G_BRIDGE 0x29C0
#endif
 
#ifndef PCI_CHIP_Q35_G
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_Q35_G_BRIDGE 0x29B0
#endif
 
#ifndef PCI_CHIP_Q33_G
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_Q33_G_BRIDGE 0x29D0
#endif
 
#ifndef PCI_CHIP_GM45_GM
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_GM45_BRIDGE 0x2A40
#endif
 
#ifndef PCI_CHIP_G45_E_G
#define PCI_CHIP_G45_E_G 0x2E02
#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00
#endif
 
#ifndef PCI_CHIP_G45_G
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G45_G_BRIDGE 0x2E20
#endif
 
#ifndef PCI_CHIP_Q45_G
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_Q45_G_BRIDGE 0x2E10
#endif
 
#ifndef PCI_CHIP_G41_G
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_G41_G_BRIDGE 0x2E30
#endif
 
#ifndef PCI_CHIP_B43_G
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_B43_G_BRIDGE 0x2E40
#endif
 
#ifndef PCI_CHIP_B43_G1
#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_B43_G1_BRIDGE 0x2E90
#endif
 
#ifndef PCI_CHIP_IRONLAKE_D_G
#define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040
#endif
 
#ifndef PCI_CHIP_IRONLAKE_M_G
#define PCI_CHIP_IRONLAKE_M_G 0x0046
#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044
#endif
 
#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
 
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156
194,88 → 66,52
 
#define PCI_CHIP_HASWELL_D_GT1 0x0402
#define PCI_CHIP_HASWELL_D_GT2 0x0412
#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422
#define PCI_CHIP_HASWELL_D_GT3 0x0422
#define PCI_CHIP_HASWELL_M_GT1 0x0406
#define PCI_CHIP_HASWELL_M_GT2 0x0416
#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426
#define PCI_CHIP_HASWELL_M_GT3 0x0426
#define PCI_CHIP_HASWELL_S_GT1 0x040A
#define PCI_CHIP_HASWELL_S_GT2 0x041A
#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A
#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02
#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12
#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16
#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A
#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
#define PCI_CHIP_HASWELL_S_GT3 0x042A
#define PCI_CHIP_HASWELL_B_GT1 0x040B
#define PCI_CHIP_HASWELL_B_GT2 0x041B
#define PCI_CHIP_HASWELL_B_GT3 0x042B
#define PCI_CHIP_HASWELL_E_GT1 0x040E
#define PCI_CHIP_HASWELL_E_GT2 0x041E
#define PCI_CHIP_HASWELL_E_GT3 0x042E
 
#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02
#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12
#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22
#define PCI_CHIP_HASWELL_ULT_D_GT3 0x0A22
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22
#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A
#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B
#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B
#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B
#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E
#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E
#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E
 
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30
#define PCI_CHIP_VALLEYVIEW_1 0x0f31
#define PCI_CHIP_VALLEYVIEW_2 0x0f32
#define PCI_CHIP_VALLEYVIEW_3 0x0f33
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D02
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT3 0x0D22
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A
#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B
#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B
#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B
#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E
#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E
#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E
 
#endif
 
#define I85X_CAPID 0x44
#define I85X_VARIANT_MASK 0x7
#define I85X_VARIANT_SHIFT 5
#define I855_GME 0x0
#define I855_GM 0x4
#define I852_GME 0x2
#define I852_GM 0x5
 
#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr
#define VENDOR_ID(p) (p)->vendor_id
#define DEVICE_ID(p) (p)->device_id
#define SUBVENDOR_ID(p) (p)->subvendor_id
#define SUBSYS_ID(p) (p)->subdevice_id
#define CHIP_REVISION(p) (p)->revision
 
#define INTEL_INFO(intel) ((intel)->info)
#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
#define IS_GEN1(intel) IS_GENx(intel, 1)
#define IS_GEN2(intel) IS_GENx(intel, 2)
#define IS_GEN3(intel) IS_GENx(intel, 3)
#define IS_GEN4(intel) IS_GENx(intel, 4)
#define IS_GEN5(intel) IS_GENx(intel, 5)
#define IS_GEN6(intel) IS_GENx(intel, 6)
#define IS_GEN7(intel) IS_GENx(intel, 7)
#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
 
/* Some chips have specific errata (or limits) that we need to workaround. */
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G)
#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G)
 
#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G)
#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM)
 
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
 
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
 
struct intel_device_info {
int gen;
};
/drivers/video/Intel-2D/intel_list.h
107,6 → 107,7
* There are no requirements for a list head, any struct list can be a list
* head.
*/
 
struct list {
struct list *next, *prev;
};
/drivers/video/Intel-2D/kgem.c
0,0 → 1,5410
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
 
#include <time.h>
#include <errno.h>
#include <fcntl.h>
 
#ifdef HAVE_VALGRIND
#include <valgrind.h>
#include <memcheck.h>
#endif
 
#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
#include <sys/sysinfo.h>
#endif
 
#include "sna_cpuid.h"
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
#define DBG_NO_HW 0
#define DBG_NO_TILING 0
#define DBG_NO_CACHE 0
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_CREATE2 1
#define DBG_NO_USERPTR 0
#define DBG_NO_UNSYNCHRONIZED_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 1
#define DBG_NO_UPLOAD_CACHE 0
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 1
#define DBG_NO_WT 0
#define DBG_DUMP 0
 
#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
 
#ifndef DEBUG_SYNC
#define DEBUG_SYNC 0
#endif
 
 
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
#else
#define ASSERT_IDLE(kgem__, handle__)
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
#endif
 
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
* offsets be aligned to a cacheline (64 bytes).
*/
#define UPLOAD_ALIGNMENT 128
 
#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
 
#define MAX_GTT_VMA_CACHE 512
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
#define LOCAL_I915_PARAM_HAS_WT 27
 
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
struct local_i915_gem_userptr {
uint64_t user_ptr;
uint64_t user_size;
uint32_t flags;
#define I915_USERPTR_READ_ONLY (1<<0)
#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
uint32_t handle;
};
 
#define UNCACHED 0
#define SNOOPED 1
#define DISPLAY 2
 
struct local_i915_gem_caching {
uint32_t handle;
uint32_t caching;
};
 
#define LOCAL_IOCTL_I915_GEM_SET_CACHING SRV_I915_GEM_SET_CACHING
 
struct local_fbinfo {
int width;
int height;
int pitch;
int tiling;
};
 
struct kgem_buffer {
struct kgem_bo base;
void *mem;
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
};
 
static struct kgem_bo *__kgem_freed_bo;
static struct kgem_request *__kgem_freed_request;
static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
 
static inline int bytes(struct kgem_bo *bo)
{
return __kgem_bo_size(bo);
}
 
#define bucket(B) (B)->size.pages.bucket
#define num_pages(B) (B)->size.pages.count
 
#ifdef DEBUG_MEMORY
static void debug_alloc(struct kgem *kgem, size_t size)
{
kgem->debug_memory.bo_allocs++;
kgem->debug_memory.bo_bytes += size;
}
static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
{
debug_alloc(kgem, bytes(bo));
}
#else
#define debug_alloc(k, b)
#define debug_alloc__bo(k, b)
#endif
 
#ifndef NDEBUG
static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_get_tiling tiling;
 
assert(bo);
 
VG_CLEAR(tiling);
tiling.handle = bo->handle;
tiling.tiling_mode = -1;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
assert(tiling.tiling_mode == bo->tiling);
}
#else
#define assert_tiling(kgem, bo)
#endif
 
static void kgem_sna_reset(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.reset(sna);
sna->blt_state.fill_bo = 0;
}
 
static void kgem_sna_flush(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.flush(sna);
 
// if (sna->render.solid_cache.dirty)
// sna_render_flush_solid(sna);
}
 
static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
{
struct drm_i915_gem_set_tiling set_tiling;
int ret;
 
if (DBG_NO_TILING)
return false;
 
VG_CLEAR(set_tiling);
do {
set_tiling.handle = handle;
set_tiling.tiling_mode = tiling;
set_tiling.stride = stride;
 
ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret != 0);
return ret == 0;
}
 
static bool gem_set_caching(int fd, uint32_t handle, int caching)
{
struct local_i915_gem_caching arg;
 
VG_CLEAR(arg);
arg.handle = handle;
arg.caching = caching;
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
}
 
 
 
 
 
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: not retiring per-request\n", __FUNCTION__));
return false;
}
 
if (!kgem->need_retire) {
DBG(("%s: nothing to retire\n", __FUNCTION__));
return false;
}
 
if (kgem_retire(kgem))
return true;
 
if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
DBG(("%s: not throttling\n", __FUNCTION__));
return false;
}
 
kgem_throttle(kgem);
return kgem_retire(kgem);
}
 
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
void *ptr;
 
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
assert(kgem_bo_can_map(kgem, bo));
 
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
 
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry_gtt;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry_gtt;
}
 
printf("%s: failed to retrieve GTT offset for handle=%d\n",
__FUNCTION__, bo->handle);
return NULL;
}
 
retry_mmap:
ptr = (void*)(int)mmap_arg.offset;
if (ptr == NULL) {
ErrorF("%s: failed to mmap handle=%d, %d bytes, into GTT domain\n",
__FUNCTION__, bo->handle, bytes(bo));
ptr = NULL;
}
 
return ptr;
}
 
static int __gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
static int gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
/* align the transfer to cachelines; fortuitously this is safe! */
if ((offset | length) & 63) {
pwrite.offset = offset & ~63;
pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
} else {
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
}
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
 
bool __kgem_busy(struct kgem *kgem, int handle)
{
struct drm_i915_gem_busy busy;
 
VG_CLEAR(busy);
busy.handle = handle;
busy.busy = !kgem->wedged;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
__FUNCTION__, handle, busy.busy, kgem->wedged));
 
return busy.busy;
}
 
static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
__FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
__kgem_busy(kgem, bo->handle)));
assert(bo->exec == NULL);
assert(list_is_empty(&bo->vma));
 
if (bo->rq) {
if (!__kgem_busy(kgem, bo->handle)) {
__kgem_bo_clear_busy(bo);
kgem_retire(kgem);
}
} else {
assert(!bo->needs_flush);
ASSERT_IDLE(kgem, bo->handle);
}
}
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
assert(bo->refcnt);
assert(!bo->purged);
assert(bo->proxy == NULL);
ASSERT_IDLE(kgem, bo->handle);
 
assert(length <= bytes(bo));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return false;
 
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
if (bo->exec == NULL) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_NONE;
}
bo->gtt_dirty = true;
return true;
}
 
static uint32_t gem_create(int fd, int num_pages)
{
struct drm_i915_gem_create create;
 
VG_CLEAR(create);
create.handle = 0;
create.size = PAGE_SIZE * num_pages;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 
return create.handle;
}
 
static bool
kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->exec == NULL);
assert(!bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = 1;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return true;
#endif
}
 
static bool
kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
if (!bo->purged)
return true;
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
return madv.retained;
 
return false;
#endif
}
 
static bool
kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_WILLNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = !madv.retained;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return false;
#endif
}
 
static void gem_close(int fd, uint32_t handle)
{
struct drm_gem_close close;
 
VG_CLEAR(close);
close.handle = handle;
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
}
 
constant inline static unsigned long __fls(unsigned long word)
{
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
return word;
#else
unsigned int v = 0;
 
while (word >>= 1)
v++;
 
return v;
#endif
}
 
constant inline static int cache_bucket(int num_pages)
{
return __fls(num_pages);
}
 
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
int handle, int num_pages)
{
assert(num_pages);
memset(bo, 0, sizeof(*bo));
 
bo->refcnt = 1;
bo->handle = handle;
bo->target_handle = -1;
num_pages(bo) = num_pages;
bucket(bo) = cache_bucket(num_pages);
bo->reusable = true;
bo->domain = DOMAIN_CPU;
list_init(&bo->request);
list_init(&bo->list);
list_init(&bo->vma);
 
return bo;
}
 
static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
{
struct kgem_bo *bo;
 
if (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
} else {
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
}
 
return __kgem_bo_init(bo, handle, num_pages);
}
 
static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
{
struct kgem_request *rq;
 
rq = __kgem_freed_request;
if (rq) {
__kgem_freed_request = *(struct kgem_request **)rq;
} else {
rq = malloc(sizeof(*rq));
if (rq == NULL)
rq = &kgem->static_request;
}
 
list_init(&rq->buffers);
rq->bo = NULL;
rq->ring = 0;
 
return rq;
}
 
static void __kgem_request_free(struct kgem_request *rq)
{
_list_del(&rq->list);
*(struct kgem_request **)rq = __kgem_freed_request;
__kgem_freed_request = rq;
}
 
static struct list *inactive(struct kgem *kgem, int num_pages)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->inactive[cache_bucket(num_pages)];
}
 
static struct list *active(struct kgem *kgem, int num_pages, int tiling)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->active[cache_bucket(num_pages)][tiling];
}
 
static size_t
agp_aperture_size(struct pci_device *dev, unsigned gen)
{
/* XXX assume that only future chipsets are unknown and follow
* the post gen2 PCI layout.
*/
return 0;
}
 
static size_t
total_ram_size(void)
{
uint32_t data[9];
size_t size = 0;
 
asm volatile("int $0x40"
: "=a" (size)
: "a" (18),"b"(20), "c" (data)
: "memory");
 
return size != -1 ? size : 0;
}
 
static unsigned
cpu_cache_size__cpuid4(void)
{
/* Deterministic Cache Parmaeters (Function 04h)":
* When EAX is initialized to a value of 4, the CPUID instruction
* returns deterministic cache information in the EAX, EBX, ECX
* and EDX registers. This function requires ECX be initialized
* with an index which indicates which cache to return information
* about. The OS is expected to call this function (CPUID.4) with
* ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
* The order in which the caches are returned is not specified
* and may change at Intel's discretion.
*
* Calculating the Cache Size in bytes:
* = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
*/
 
unsigned int eax, ebx, ecx, edx;
unsigned int llc_size = 0;
int cnt = 0;
 
if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
return 0;
 
do {
unsigned associativity, line_partitions, line_size, sets;
 
__cpuid_count(4, cnt++, eax, ebx, ecx, edx);
 
if ((eax & 0x1f) == 0)
break;
 
associativity = ((ebx >> 22) & 0x3ff) + 1;
line_partitions = ((ebx >> 12) & 0x3ff) + 1;
line_size = (ebx & 0xfff) + 1;
sets = ecx + 1;
 
llc_size = associativity * line_partitions * line_size * sets;
} while (1);
 
return llc_size;
}
 
static int gem_param(struct kgem *kgem, int name)
{
drm_i915_getparam_t gp;
int v = -1; /* No param uses the sign bit, reserve it for errors */
 
VG_CLEAR(gp);
gp.param = name;
gp.value = &v;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
return -1;
 
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
return v;
}
 
static bool test_has_execbuffer2(struct kgem *kgem)
{
return 1;
}
 
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
 
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
 
static bool test_has_wt(struct kgem *kgem)
{
if (DBG_NO_WT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
}
 
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
bool detected = false;
int ret;
 
if (DBG_NO_SEMAPHORES)
return false;
 
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
if (ret != -1)
return ret > 0;
 
return detected;
}
 
static bool __kgem_throttle(struct kgem *kgem)
{
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0)
return false;
 
return errno == EIO;
}
 
static bool is_hw_supported(struct kgem *kgem,
struct pci_device *dev)
{
if (DBG_NO_HW)
return false;
 
if (!test_has_execbuffer2(kgem))
return false;
 
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
return kgem->has_blt;
 
/* Although pre-855gm the GMCH is fubar, it works mostly. So
* let the user decide through "NoAccel" whether or not to risk
* hw acceleration.
*/
 
if (kgem->gen == 060 && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
 
if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
return kgem->has_blt;
 
return true;
}
 
static bool test_has_relaxed_fencing(struct kgem *kgem)
{
if (kgem->gen < 040) {
if (DBG_NO_RELAXED_FENCING)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
} else
return true;
}
 
static bool test_has_llc(struct kgem *kgem)
{
int has_llc = -1;
 
if (DBG_NO_LLC)
return false;
 
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
#endif
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
}
 
return has_llc;
}
 
static bool test_has_caching(struct kgem *kgem)
{
uint32_t handle;
bool ret;
 
if (DBG_NO_CACHE_LEVEL)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
handle = gem_create(kgem->fd, 1);
if (handle == 0)
return false;
 
ret = gem_set_caching(kgem->fd, handle, UNCACHED);
gem_close(kgem->fd, handle);
return ret;
}
 
static bool test_has_userptr(struct kgem *kgem)
{
#if defined(USE_USERPTR)
uint32_t handle;
void *ptr;
 
if (DBG_NO_USERPTR)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
return false;
 
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
gem_close(kgem->fd, handle);
free(ptr);
 
return handle != 0;
#else
return false;
#endif
}
 
static bool test_has_create2(struct kgem *kgem)
{
#if defined(USE_CREATE2)
struct local_i915_gem_create2 args;
 
if (DBG_NO_CREATE2)
return false;
 
memset(&args, 0, sizeof(args));
args.size = PAGE_SIZE;
args.caching = DISPLAY;
if (drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
gem_close(kgem->fd, args.handle);
 
return args.handle != 0;
#else
return false;
#endif
}
 
static bool test_has_secure_batches(struct kgem *kgem)
{
if (DBG_NO_SECURE_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
}
 
static bool test_has_pinned_batches(struct kgem *kgem)
{
if (DBG_NO_PINNED_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
}
 
 
static bool kgem_init_pinned_batches(struct kgem *kgem)
{
int count[2] = { 2, 1 };
int size[2] = { 1, 2 };
int n, i;
 
if (kgem->wedged)
return true;
 
for (n = 0; n < ARRAY_SIZE(count); n++) {
for (i = 0; i < count[n]; i++) {
struct drm_i915_gem_pin pin;
struct kgem_bo *bo;
 
VG_CLEAR(pin);
 
pin.handle = gem_create(kgem->fd, size[n]);
if (pin.handle == 0)
goto err;
 
DBG(("%s: new handle=%d, num_pages=%d\n",
__FUNCTION__, pin.handle, size[n]));
 
bo = __kgem_bo_alloc(pin.handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, pin.handle);
goto err;
}
 
pin.alignment = 0;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
gem_close(kgem->fd, pin.handle);
goto err;
}
bo->presumed_offset = pin.offset;
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
}
 
return true;
 
err:
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
 
/* For simplicity populate the lists with a single unpinned bo */
for (n = 0; n < ARRAY_SIZE(count); n++) {
struct kgem_bo *bo;
uint32_t handle;
 
handle = gem_create(kgem->fd, size[n]);
if (handle == 0)
break;
 
bo = __kgem_bo_alloc(handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, handle);
break;
}
 
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
return false;
}
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
unsigned half_gpu_max;
unsigned int i, j;
 
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
 
memset(kgem, 0, sizeof(*kgem));
 
kgem->fd = fd;
kgem->gen = gen;
 
list_init(&kgem->requests[0]);
list_init(&kgem->requests[1]);
list_init(&kgem->batch_buffers);
list_init(&kgem->active_buffers);
list_init(&kgem->flushing);
list_init(&kgem->large);
list_init(&kgem->large_inactive);
list_init(&kgem->snoop);
list_init(&kgem->scanout);
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
list_init(&kgem->pinned_batches[i]);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
list_init(&kgem->active[i][j]);
}
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
list_init(&kgem->vma[i].inactive[j]);
}
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
 
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
 
kgem->has_relaxed_delta =
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
kgem->has_relaxed_delta));
 
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
kgem->has_relaxed_fencing));
 
kgem->has_llc = test_has_llc(kgem);
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
kgem->has_llc));
 
kgem->has_wt = test_has_wt(kgem);
DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
kgem->has_wt));
 
kgem->has_caching = test_has_caching(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_caching));
 
kgem->has_userptr = test_has_userptr(kgem);
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
 
kgem->has_create2 = test_has_create2(kgem);
kgem->has_create2 = 0;
DBG(("%s: has create2? %d\n", __FUNCTION__,
kgem->has_create2));
 
kgem->has_no_reloc = test_has_no_reloc(kgem);
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
 
kgem->has_handle_lut = test_has_handle_lut(kgem);
kgem->has_handle_lut = 0;
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
 
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
kgem->has_semaphores));
 
kgem->can_blt_cpu = gen >= 030;
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
 
kgem->has_secure_batches = test_has_secure_batches(kgem);
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
 
kgem->has_pinned_batches = test_has_pinned_batches(kgem);
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
kgem->has_pinned_batches));
 
if (!is_hw_supported(kgem, dev)) {
printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
kgem->wedged = 1;
} else if (__kgem_throttle(kgem)) {
printf("Detected a hung GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
kgem->batch_size = ARRAY_SIZE(kgem->batch);
if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
if (gen == 022)
/* 865g cannot handle a batch spanning multiple pages */
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
if ((gen >> 3) == 7)
kgem->batch_size = 16*1024;
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
kgem->batch_size = 4*1024;
 
if (!kgem_init_pinned_batches(kgem) && gen == 020) {
printf("Unable to reserve memory for GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
 
kgem->min_alignment = 4;
if (gen < 040)
kgem->min_alignment = 64;
 
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
__FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
 
kgem->next_request = __kgem_request_alloc(kgem);
 
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
kgem->has_llc, kgem->has_caching, kgem->has_userptr));
 
VG_CLEAR(aperture);
aperture.aper_size = 0;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
if (aperture.aper_size == 0)
aperture.aper_size = 64*1024*1024;
 
DBG(("%s: aperture size %lld, available now %lld\n",
__FUNCTION__,
(long long)aperture.aper_size,
(long long)aperture.aper_available_size));
 
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/3;
if (gen < 033) {
/* Severe alignment penalties */
kgem->aperture_high /= 2;
kgem->aperture_low /= 2;
}
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
 
kgem->aperture_mappable = agp_aperture_size(dev, gen);
if (kgem->aperture_mappable == 0 ||
kgem->aperture_mappable > aperture.aper_size)
kgem->aperture_mappable = aperture.aper_size;
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
 
kgem->buffer_size = 64 * 1024;
while (kgem->buffer_size < kgem->aperture_mappable >> 10)
kgem->buffer_size *= 2;
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
kgem->buffer_size = 1 << __fls(kgem->buffer_size);
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
kgem->buffer_size, kgem->buffer_size / 1024));
assert(kgem->buffer_size);
 
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
kgem->max_gpu_size = MAX_CACHE_SIZE;
 
totalram = total_ram_size();
if (totalram == 0) {
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
__FUNCTION__));
totalram = kgem->aperture_total;
}
DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
if (kgem->max_object_size > totalram / 2)
kgem->max_object_size = totalram / 2;
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
 
kgem->max_cpu_size = kgem->max_object_size;
 
half_gpu_max = kgem->max_gpu_size / 2;
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
if (kgem->max_copy_tile_size > half_gpu_max)
kgem->max_copy_tile_size = half_gpu_max;
 
if (kgem->has_llc)
kgem->max_upload_tile_size = kgem->max_copy_tile_size;
else
kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
if (kgem->max_upload_tile_size > half_gpu_max)
kgem->max_upload_tile_size = half_gpu_max;
if (kgem->max_upload_tile_size > kgem->aperture_high/2)
kgem->max_upload_tile_size = kgem->aperture_high/2;
if (kgem->max_upload_tile_size > kgem->aperture_low)
kgem->max_upload_tile_size = kgem->aperture_low;
if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
kgem->max_upload_tile_size = 16*PAGE_SIZE;
 
kgem->large_object_size = MAX_CACHE_SIZE;
if (kgem->large_object_size > half_gpu_max)
kgem->large_object_size = half_gpu_max;
if (kgem->max_copy_tile_size > kgem->aperture_high/2)
kgem->max_copy_tile_size = kgem->aperture_high/2;
if (kgem->max_copy_tile_size > kgem->aperture_low)
kgem->max_copy_tile_size = kgem->aperture_low;
if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
kgem->max_copy_tile_size = 16*PAGE_SIZE;
 
if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
if (kgem->large_object_size > kgem->max_cpu_size)
kgem->large_object_size = kgem->max_cpu_size;
} else
kgem->max_cpu_size = 0;
if (DBG_NO_CPU)
kgem->max_cpu_size = 0;
 
DBG(("%s: maximum object size=%d\n",
__FUNCTION__, kgem->max_object_size));
DBG(("%s: large object thresold=%d\n",
__FUNCTION__, kgem->large_object_size));
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
__FUNCTION__,
kgem->max_gpu_size, kgem->max_cpu_size,
kgem->max_upload_tile_size, kgem->max_copy_tile_size));
 
/* Convert the aperture thresholds to pages */
kgem->aperture_low /= PAGE_SIZE;
kgem->aperture_high /= PAGE_SIZE;
 
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
 
kgem->batch_flags_base = 0;
if (kgem->has_no_reloc)
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
 
/* XXX hopefully a good approximation */
uint32_t kgem_get_unique_id(struct kgem *kgem)
{
uint32_t id;
id = ++kgem->unique_id;
if (id == 0)
id = ++kgem->unique_id;
return id;
}
 
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_PRIME)
return 256;
if (flags & CREATE_SCANOUT)
return 64;
return kgem->min_alignment;
}
 
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size)
{
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
*tile_width = 128;
*tile_height = 16;
*tile_size = 2048;
} else {
*tile_width = 512;
*tile_height = 8;
*tile_size = 4096;
}
} else {
*tile_width = 1;
*tile_height = 1;
*tile_size = 1;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
*tile_width = 1;
*tile_height = 1;
*tile_size = 1;
break;
case I915_TILING_X:
*tile_width = 512;
*tile_height = 8;
*tile_size = 4096;
break;
case I915_TILING_Y:
*tile_width = 128;
*tile_height = 32;
*tile_size = 4096;
break;
}
}
 
uint32_t kgem_surface_size(struct kgem *kgem,
bool relaxed_fencing,
unsigned flags,
uint32_t width,
uint32_t height,
uint32_t bpp,
uint32_t tiling,
uint32_t *pitch)
{
uint32_t tile_width, tile_height;
uint32_t size;
 
assert(width <= MAXSHORT);
assert(height <= MAXSHORT);
assert(bpp >= 8);
 
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
tile_width = 128;
tile_height = 32;
} else {
tile_width = 512;
tile_height = 16;
}
} else {
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
break;
 
/* XXX align to an even tile row */
case I915_TILING_X:
tile_width = 512;
tile_height = 16;
break;
case I915_TILING_Y:
tile_width = 128;
tile_height = 64;
break;
}
 
*pitch = ALIGN(width * bpp / 8, tile_width);
height = ALIGN(height, tile_height);
if (kgem->gen >= 040)
return PAGE_ALIGN(*pitch * height);
 
/* If it is too wide for the blitter, don't even bother. */
if (tiling != I915_TILING_NONE) {
if (*pitch > 8192)
return 0;
 
for (size = tile_width; size < *pitch; size <<= 1)
;
*pitch = size;
} else {
if (*pitch >= 32768)
return 0;
}
 
size = *pitch * height;
if (relaxed_fencing || tiling == I915_TILING_NONE)
return PAGE_ALIGN(size);
 
/* We need to allocate a pot fence region for a tiled buffer. */
if (kgem->gen < 030)
tile_width = 512 * 1024;
else
tile_width = 1024 * 1024;
while (tile_width < size)
tile_width *= 2;
return tile_width;
}
 
static uint32_t kgem_aligned_height(struct kgem *kgem,
uint32_t height, uint32_t tiling)
{
uint32_t tile_height;
 
if (kgem->gen <= 030) {
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
} else switch (tiling) {
/* XXX align to an even tile row */
default:
case I915_TILING_NONE:
tile_height = 1;
break;
case I915_TILING_X:
tile_height = 16;
break;
case I915_TILING_Y:
tile_height = 64;
break;
}
 
return ALIGN(height, tile_height);
}
 
static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_exec_object2 *exec;
 
DBG(("%s: handle=%d, index=%d\n",
__FUNCTION__, bo->handle, kgem->nexec));
 
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
 
kgem->aperture += num_pages(bo);
 
return exec;
}
 
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
bo->exec = kgem_add_handle(kgem, bo);
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
list_move_tail(&bo->request, &kgem->next_request->buffers);
 
/* XXX is it worth working around gcc here? */
kgem->flush |= bo->flush;
}
 
static uint32_t kgem_end_batch(struct kgem *kgem)
{
kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
if (kgem->nbatch & 1)
kgem->batch[kgem->nbatch++] = MI_NOOP;
 
return kgem->nbatch;
}
 
static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
{
int n;
 
assert(kgem->nreloc__self <= 256);
if (kgem->nreloc__self == 0)
return;
 
for (n = 0; n < kgem->nreloc__self; n++) {
int i = kgem->reloc__self[n];
assert(kgem->reloc[i].target_handle == ~0U);
kgem->reloc[i].target_handle = bo->target_handle;
kgem->reloc[i].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
kgem->reloc[i].delta + bo->presumed_offset;
}
 
if (n == 256) {
for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == ~0U) {
kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
}
}
 
}
 
}
 
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_bo_binding *b;
 
b = bo->binding.next;
while (b) {
struct kgem_bo_binding *next = b->next;
free (b);
b = next;
}
}
 
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
{
int type = IS_CPU_MAP(bo->map);
 
assert(!IS_USER_MAP(bo->map));
 
DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
__FUNCTION__, type ? "CPU" : "GTT",
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
list_del(&bo->vma);
kgem->vma[type].count--;
}
}
 
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->refcnt == 0);
assert(bo->proxy == NULL);
assert(bo->exec == NULL);
assert(!bo->snoop || bo->rq == NULL);
 
#ifdef DEBUG_MEMORY
kgem->debug_memory.bo_allocs--;
kgem->debug_memory.bo_bytes -= bytes(bo);
#endif
 
kgem_bo_binding_free(kgem, bo);
 
if (IS_USER_MAP(bo->map)) {
assert(bo->rq == NULL);
assert(!__kgem_busy(kgem, bo->handle));
assert(MAP(bo->map) != bo || bo->io || bo->flush);
if (!(bo->io || bo->flush)) {
DBG(("%s: freeing snooped base\n", __FUNCTION__));
assert(bo != MAP(bo->map));
free(MAP(bo->map));
}
bo->map = NULL;
}
if (bo->map)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
assert(bo->map == NULL);
 
_list_del(&bo->list);
_list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
 
if (!bo->io) {
*(struct kgem_bo **)bo = __kgem_freed_bo;
__kgem_freed_bo = bo;
} else
free(bo);
}
 
inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(bo->domain != DOMAIN_GPU);
assert(!bo->proxy);
assert(!bo->io);
assert(!bo->scanout);
assert(!bo->snoop);
assert(!bo->flush);
assert(!bo->needs_flush);
assert(list_is_empty(&bo->vma));
assert_tiling(kgem, bo);
ASSERT_IDLE(kgem, bo->handle);
 
kgem->need_expire = true;
 
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
list_move(&bo->list, &kgem->large_inactive);
return;
}
 
assert(bo->flush == false);
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
(!type && !__kgem_bo_is_mappable(kgem, bo))) {
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
}
if (bo->map) {
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
kgem->vma[type].count++;
}
}
}
 
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
{
struct kgem_bo *base;
 
if (!bo->io)
return bo;
 
assert(!bo->snoop);
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
__FUNCTION__, bo->handle));
/* transfer the handle to a minimum bo */
memcpy(base, bo, sizeof(*base));
base->io = false;
list_init(&base->list);
list_replace(&bo->request, &base->request);
list_replace(&bo->vma, &base->vma);
free(bo);
bo = base;
} else
bo->reusable = false;
 
return bo;
}
 
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (bo->map) {
assert(!list_is_empty(&bo->vma));
list_del(&bo->vma);
kgem->vma[IS_CPU_MAP(bo->map)].count--;
}
}
 
inline static void kgem_bo_remove_from_active(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq != NULL);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
assert(list_is_empty(&bo->vma));
}
 
static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
 
DBG(("%s: size=%d, offset=%d, parent used=%d\n",
__FUNCTION__, bo->size.bytes, bo->delta, io->used));
 
if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
io->used = bo->delta;
}
 
static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->scanout);
assert(bo->delta);
assert(!bo->flush);
assert(!bo->snoop);
assert(!bo->io);
 
if (bo->purged) {
DBG(("%s: discarding purged scanout - external name?\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
return;
}
 
DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
__FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
if (bo->rq)
list_move_tail(&bo->list, &kgem->scanout);
else
list_move(&bo->list, &kgem->scanout);
}
 
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->reusable);
assert(!bo->flush);
assert(!bo->needs_flush);
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
 
if (num_pages(bo) > kgem->max_cpu_size >> 13) {
DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
__FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
kgem_bo_free(kgem, bo);
return;
}
 
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
 
DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
list_add(&bo->list, &kgem->snoop);
}
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
 
DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
 
if ((kgem->has_caching | kgem->has_userptr) == 0)
return NULL;
 
if (list_is_empty(&kgem->snoop)) {
DBG(("%s: inactive and cache empty\n", __FUNCTION__));
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
}
 
list_for_each_entry(bo, &kgem->snoop, list) {
assert(bo->refcnt == 0);
assert(bo->snoop);
assert(!bo->scanout);
assert(!bo->purged);
assert(bo->proxy == NULL);
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
 
if (num_pages > num_pages(bo))
continue;
 
if (num_pages(bo) > 2*num_pages) {
if (first == NULL)
first = bo;
continue;
}
 
list_del(&bo->list);
bo->pitch = 0;
bo->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
return bo;
}
 
if (first) {
list_del(&first->list);
first->pitch = 0;
first->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, first->handle, num_pages(first)));
return first;
}
 
return NULL;
}
 
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
{
if (kgem->nexec != 1 || bo->exec == NULL)
return;
 
DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
__FUNCTION__, bo->handle));
 
assert(bo->exec == &kgem->exec[0]);
assert(kgem->exec[0].handle == bo->handle);
assert(RQ(bo->rq) == kgem->next_request);
 
bo->refcnt++;
kgem_reset(kgem);
bo->refcnt--;
}
 
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(list_is_empty(&bo->list));
assert(bo->refcnt == 0);
assert(!bo->purged || !bo->reusable);
assert(bo->proxy == NULL);
assert_tiling(kgem, bo);
 
bo->binding.offset = 0;
 
if (DBG_NO_CACHE)
goto destroy;
 
if (bo->snoop && !bo->flush) {
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
assert(bo->reusable);
assert(list_is_empty(&bo->list));
if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
if (bo->rq == NULL)
kgem_bo_move_to_snoop(kgem, bo);
return;
}
if (!IS_USER_MAP(bo->map))
bo->flush = false;
 
if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
return;
}
 
if (bo->io)
bo = kgem_bo_replace_io(bo);
if (!bo->reusable) {
DBG(("%s: handle=%d, not reusable\n",
__FUNCTION__, bo->handle));
goto destroy;
}
 
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
 
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->flush == false);
assert(bo->snoop == false);
assert(bo->io == false);
assert(bo->scanout == false);
 
kgem_bo_undo(kgem, bo);
assert(bo->refcnt == 0);
 
if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
if (bo->rq) {
struct list *cache;
 
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
if (bucket(bo) < NUM_CACHE_BUCKETS)
cache = &kgem->active[bucket(bo)][bo->tiling];
else
cache = &kgem->large;
list_add(&bo->list, cache);
return;
}
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
 
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
 
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
goto destroy;
 
DBG(("%s: handle=%d, purged\n",
__FUNCTION__, bo->handle));
}
 
kgem_bo_move_to_inactive(kgem, bo);
return;
 
destroy:
if (!bo->exec)
kgem_bo_free(kgem, bo);
}
 
static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
__kgem_bo_destroy(kgem, bo);
}
 
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
{
while (!list_is_empty(&bo->base.vma)) {
struct kgem_bo *cached;
 
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
assert(cached->proxy == &bo->base);
list_del(&cached->vma);
 
assert(*(struct kgem_bo **)cached->map == cached);
*(struct kgem_bo **)cached->map = NULL;
cached->map = NULL;
 
kgem_bo_destroy(kgem, cached);
}
}
 
static bool kgem_retire__buffers(struct kgem *kgem)
{
bool retired = false;
 
while (!list_is_empty(&kgem->active_buffers)) {
struct kgem_buffer *bo =
list_last_entry(&kgem->active_buffers,
struct kgem_buffer,
base.list);
 
if (bo->base.rq)
break;
 
DBG(("%s: releasing upload cache for handle=%d? %d\n",
__FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
list_del(&bo->base.list);
kgem_buffer_release(kgem, bo);
kgem_bo_unref(kgem, &bo->base);
retired = true;
}
 
return retired;
}
 
static bool kgem_retire__flushing(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
bool retired = false;
 
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
assert(bo->rq == (void *)kgem);
assert(bo->exec == NULL);
 
if (__kgem_busy(kgem, bo->handle))
break;
 
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else
kgem_bo_free(kgem, bo);
}
#if HAS_DEBUG_FULL
{
int count = 0;
list_for_each_entry(bo, &kgem->flushing, request)
count++;
ErrorF("%s: %d bo on flushing list\n", __FUNCTION__, count);
}
#endif
 
kgem->need_retire |= !list_is_empty(&kgem->flushing);
 
return retired;
}
 
 
static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
bool retired = false;
 
DBG(("%s: request %d complete\n",
__FUNCTION__, rq->bo->handle));
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
assert(RQ(bo->rq) == rq);
assert(bo->exec == NULL);
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
 
list_del(&bo->request);
 
if (bo->needs_flush)
bo->needs_flush = __kgem_busy(kgem, bo->handle);
if (bo->needs_flush) {
DBG(("%s: moving %d to flushing\n",
__FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
continue;
}
 
bo->domain = DOMAIN_NONE;
bo->rq = NULL;
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
assert(rq->bo->rq == NULL);
assert(list_is_empty(&rq->bo->request));
 
if (--rq->bo->refcnt == 0) {
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, rq->bo->handle));
kgem_bo_free(kgem, rq->bo);
}
}
 
__kgem_request_free(rq);
return retired;
}
 
static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
{
bool retired = false;
 
while (!list_is_empty(&kgem->requests[ring])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list);
if (__kgem_busy(kgem, rq->bo->handle))
break;
 
retired |= __kgem_retire_rq(kgem, rq);
}
 
#if HAS_DEBUG_FULL
{
struct kgem_bo *bo;
int count = 0;
 
list_for_each_entry(bo, &kgem->requests[ring], request)
count++;
 
bo = NULL;
if (!list_is_empty(&kgem->requests[ring]))
bo = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list)->bo;
 
ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n",
__FUNCTION__, ring, count, bo ? bo->handle : 0);
}
#endif
 
return retired;
}
 
static bool kgem_retire__requests(struct kgem *kgem)
{
bool retired = false;
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
retired |= kgem_retire__requests_ring(kgem, n);
kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
}
 
return retired;
}
 
bool kgem_retire(struct kgem *kgem)
{
bool retired = false;
 
DBG(("%s\n", __FUNCTION__));
 
kgem->need_retire = false;
 
retired |= kgem_retire__flushing(kgem);
retired |= kgem_retire__requests(kgem);
retired |= kgem_retire__buffers(kgem);
 
DBG(("%s -- retired=%d, need_retire=%d\n",
__FUNCTION__, retired, kgem->need_retire));
 
kgem->retire(kgem);
 
return retired;
}
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
{
struct kgem_request *rq;
 
assert(!list_is_empty(&kgem->requests[ring]));
 
rq = list_last_entry(&kgem->requests[ring],
struct kgem_request, list);
if (__kgem_busy(kgem, rq->bo->handle)) {
DBG(("%s: last requests handle=%d still busy\n",
__FUNCTION__, rq->bo->handle));
return false;
}
 
DBG(("%s: ring=%d idle (handle=%d)\n",
__FUNCTION__, ring, rq->bo->handle));
 
kgem_retire__requests_ring(kgem, ring);
assert(list_is_empty(&kgem->requests[ring]));
return true;
}
 
static void kgem_commit(struct kgem *kgem)
{
struct kgem_request *rq = kgem->next_request;
struct kgem_bo *bo, *next;
 
list_for_each_entry_safe(bo, next, &rq->buffers, request) {
assert(next->request.prev == &bo->request);
 
DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
__FUNCTION__, bo->handle, bo->proxy != NULL,
bo->gpu_dirty, bo->needs_flush, bo->snoop,
(unsigned)bo->exec->offset));
 
assert(bo->exec);
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
 
bo->presumed_offset = bo->exec->offset;
bo->exec = NULL;
bo->target_handle = -1;
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
kgem_bo_free(kgem, bo);
continue;
}
 
bo->binding.offset = 0;
bo->domain = DOMAIN_GPU;
bo->gpu_dirty = false;
 
if (bo->proxy) {
/* proxies are not used for domain tracking */
bo->exec = NULL;
__kgem_bo_clear_busy(bo);
}
 
kgem->scanout_busy |= bo->scanout;
}
 
if (rq == &kgem->static_request) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
}
 
kgem_retire(kgem);
assert(list_is_empty(&rq->buffers));
 
assert(rq->bo->map == NULL);
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
} else {
list_add_tail(&rq->list, &kgem->requests[rq->ring]);
kgem->need_throttle = kgem->need_retire = 1;
}
 
kgem->next_request = NULL;
}
 
static void kgem_close_list(struct kgem *kgem, struct list *head)
{
while (!list_is_empty(head))
kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
}
 
static void kgem_close_inactive(struct kgem *kgem)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
kgem_close_list(kgem, &kgem->inactive[i]);
}
 
static void kgem_finish_buffers(struct kgem *kgem)
{
struct kgem_buffer *bo, *next;
 
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no"));
 
assert(next->base.list.prev == &bo->base.list);
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
if (!bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d\n",
__FUNCTION__, bo->base.handle, bo->used));
continue;
}
 
if (!bo->write) {
assert(bo->base.exec || bo->base.refcnt > 1);
goto decouple;
}
 
if (bo->mmapped) {
int used;
 
assert(!bo->need_io);
 
used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
continue;
}
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
goto decouple;
}
 
if (!bo->used) {
/* Unless we replace the handle in the execbuffer,
* then this bo will become active. So decouple it
* from the buffer list and track it in the normal
* manner.
*/
goto decouple;
}
 
assert(bo->need_io);
assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(bo->base.domain != DOMAIN_GPU);
 
if (bo->base.refcnt == 1 &&
bo->base.size.pages.count > 1 &&
bo->used < bytes(&bo->base) / 2) {
struct kgem_bo *shrink;
unsigned alloc = NUM_PAGES(bo->used);
 
shrink = search_snoop_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
void *map;
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
map = kgem_bo_map__cpu(kgem, shrink);
if (map) {
kgem_bo_sync__cpu(kgem, shrink);
memcpy(map, bo->mem, bo->used);
 
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.gpu_dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.gpu_dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
 
shrink = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
if (gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem) == 0) {
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.gpu_dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.gpu_dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
}
 
DBG(("%s: handle=%d, uploading %d/%d\n",
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
bo->need_io = 0;
 
decouple:
DBG(("%s: releasing handle=%d\n",
__FUNCTION__, bo->base.handle));
list_del(&bo->base.list);
kgem_bo_unref(kgem, &bo->base);
}
}
 
static void kgem_cleanup(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
while (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
bo->exec = NULL;
bo->gpu_dirty = false;
__kgem_bo_clear_busy(bo);
if (bo->refcnt == 0)
kgem_bo_free(kgem, bo);
}
 
__kgem_request_free(rq);
}
}
 
kgem_close_inactive(kgem);
}
 
static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
{
int ret;
 
ASSERT_IDLE(kgem, handle);
 
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
 
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
}
 
/* Disjoint surface/batch, upload separately */
ret = gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
if (ret)
return ret;
 
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
}
 
void kgem_reset(struct kgem *kgem)
{
if (kgem->next_request) {
struct kgem_request *rq = kgem->next_request;
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo =
list_first_entry(&rq->buffers,
struct kgem_bo,
request);
list_del(&bo->request);
 
assert(RQ(bo->rq) == rq);
 
bo->binding.offset = 0;
bo->exec = NULL;
bo->target_handle = -1;
bo->gpu_dirty = false;
 
if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
} else
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt || bo->rq)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
if (rq != &kgem->static_request) {
list_init(&rq->list);
__kgem_request_free(rq);
}
}
 
kgem->nfence = 0;
kgem->nexec = 0;
kgem->nreloc = 0;
kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
kgem->batch_flags = kgem->batch_flags_base;
 
kgem->next_request = __kgem_request_alloc(kgem);
 
kgem_sna_reset(kgem);
}
 
static int compact_batch_surface(struct kgem *kgem)
{
int size, shrink, n;
 
if (!kgem->has_relaxed_delta)
return kgem->batch_size;
 
/* See if we can pack the contents into one or two pages */
n = ALIGN(kgem->batch_size, 1024);
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
 
shrink = n - size;
if (shrink) {
DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
 
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
 
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
kgem->reloc[n].offset -= shrink;
}
}
 
return size * sizeof(uint32_t);
}
 
static struct kgem_bo *
kgem_create_batch(struct kgem *kgem, int size)
{
struct drm_i915_gem_set_domain set_domain;
struct kgem_bo *bo;
 
if (size <= 4096) {
bo = list_first_entry(&kgem->pinned_batches[0],
struct kgem_bo,
list);
if (!bo->rq) {
out_4096:
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_4096;
}
}
 
if (size <= 16384) {
bo = list_first_entry(&kgem->pinned_batches[1],
struct kgem_bo,
list);
if (!bo->rq) {
out_16384:
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_16384;
}
}
 
if (kgem->gen == 020 && !kgem->has_pinned_batches) {
assert(size <= 16384);
 
bo = list_first_entry(&kgem->pinned_batches[size > 4096],
struct kgem_bo,
list);
list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
 
DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
return NULL;
}
 
kgem_retire(kgem);
assert(bo->rq == NULL);
return kgem_bo_reference(bo);
}
 
return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
}
 
void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
uint32_t batch_end;
int size;
 
assert(!DBG_NO_HW);
assert(!kgem->wedged);
 
assert(kgem->nbatch);
assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
assert(kgem->nbatch <= kgem->surface);
 
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
 
DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, kgem->batch_flags,
batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
 
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
assert(kgem->nfence <= kgem->fence_max);
 
kgem_finish_buffers(kgem);
 
#if SHOW_BATCH
__kgem_batch_debug(kgem, batch_end);
#endif
 
rq = kgem->next_request;
if (kgem->surface != kgem->batch_size)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
rq->bo = kgem_create_batch(kgem, size);
if (rq->bo) {
uint32_t handle = rq->bo->handle;
int i;
 
assert(!rq->bo->needs_flush);
 
i = kgem->nexec++;
kgem->exec[i].handle = handle;
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
 
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
 
kgem_fixup_self_relocs(kgem, rq->bo);
 
if (kgem_batch_write(kgem, handle, size) == 0) {
struct drm_i915_gem_execbuffer2 execbuf;
int ret, retry = 3;
 
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = (uintptr_t)kgem->exec;
execbuf.buffer_count = kgem->nexec;
execbuf.batch_len = batch_end*sizeof(uint32_t);
execbuf.flags = kgem->ring | kgem->batch_flags;
 
 
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
while (ret == -1 && errno == EBUSY && retry--) {
__kgem_throttle(kgem);
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
}
if (DEBUG_SYNC && ret == 0) {
struct drm_i915_gem_set_domain set_domain;
 
VG_CLEAR(set_domain);
set_domain.handle = handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
}
if (ret == -1) {
// DBG(("%s: GPU hang detected [%d]\n",
// __FUNCTION__, errno));
kgem_throttle(kgem);
kgem->wedged = true;
 
#if 0
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
 
for (i = 0; i < kgem->nexec; i++) {
struct kgem_bo *bo, *found = NULL;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request) {
if (bo->handle == kgem->exec[i].handle) {
found = bo;
break;
}
}
ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
i,
kgem->exec[i].handle,
(int)kgem->exec[i].offset,
found ? kgem_bo_size(found) : -1,
found ? found->tiling : -1,
(int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
found ? found->snoop : -1,
found ? found->purged : -1);
}
for (i = 0; i < kgem->nreloc; i++) {
ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
i,
(int)kgem->reloc[i].offset,
kgem->reloc[i].target_handle,
kgem->reloc[i].delta,
kgem->reloc[i].read_domains,
kgem->reloc[i].write_domain,
(int)kgem->reloc[i].presumed_offset);
}
 
if (DEBUG_SYNC) {
int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
if (fd != -1) {
write(fd, kgem->batch, batch_end*sizeof(uint32_t));
close(fd);
}
 
FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
}
#endif
}
}
 
kgem_commit(kgem);
}
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem_reset(kgem);
 
assert(kgem->next_request != NULL);
}
 
void kgem_throttle(struct kgem *kgem)
{
kgem->need_throttle = 0;
if (kgem->wedged)
return;
 
kgem->wedged = __kgem_throttle(kgem);
if (kgem->wedged) {
printf("Detected a hung GPU, disabling acceleration.\n");
printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
}
}
 
void kgem_purge_cache(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
if (!kgem_bo_is_retained(kgem, bo)) {
DBG(("%s: purging %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
}
 
kgem->need_purge = false;
}
 
 
void kgem_clean_large_cache(struct kgem *kgem)
{
while (!list_is_empty(&kgem->large_inactive)) {
kgem_bo_free(kgem,
list_first_entry(&kgem->large_inactive,
struct kgem_bo, list));
 
}
}
 
bool kgem_expire_cache(struct kgem *kgem)
{
time_t now, expire;
struct kgem_bo *bo;
unsigned int size = 0, count = 0;
bool idle;
unsigned int i;
 
time(&now);
 
while (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
while (__kgem_freed_request) {
struct kgem_request *rq = __kgem_freed_request;
__kgem_freed_request = *(struct kgem_request **)rq;
free(rq);
}
 
kgem_clean_large_cache(kgem);
 
expire = 0;
list_for_each_entry(bo, &kgem->snoop, list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME/2;
break;
}
 
bo->delta = now;
}
if (expire) {
while (!list_is_empty(&kgem->snoop)) {
bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
 
if (bo->delta > expire)
break;
 
kgem_bo_free(kgem, bo);
}
}
#ifdef DEBUG_MEMORY
{
long snoop_size = 0;
int snoop_count = 0;
list_for_each_entry(bo, &kgem->snoop, list)
snoop_count++, snoop_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
__FUNCTION__, snoop_count, snoop_size);
}
#endif
 
kgem_retire(kgem);
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem->expire(kgem);
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
expire = 0;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
idle &= list_is_empty(&kgem->inactive[i]);
list_for_each_entry(bo, &kgem->inactive[i], list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME;
break;
}
 
bo->delta = now;
}
}
if (idle) {
DBG(("%s: idle\n", __FUNCTION__));
kgem->need_expire = false;
return false;
}
if (expire == 0)
return true;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
struct list preserve;
 
list_init(&preserve);
while (!list_is_empty(&kgem->inactive[i])) {
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
 
if (bo->delta > expire) {
idle = false;
break;
}
 
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
idle = false;
list_move_tail(&bo->list, &preserve);
} else {
count++;
size += bytes(bo);
kgem_bo_free(kgem, bo);
DBG(("%s: expiring %d\n",
__FUNCTION__, bo->handle));
}
}
if (!list_is_empty(&preserve)) {
preserve.prev->next = kgem->inactive[i].next;
kgem->inactive[i].next->prev = preserve.prev;
kgem->inactive[i].next = preserve.next;
preserve.next->prev = &kgem->inactive[i];
}
}
 
#ifdef DEBUG_MEMORY
{
long inactive_size = 0;
int inactive_count = 0;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_for_each_entry(bo, &kgem->inactive[i], list)
inactive_count++, inactive_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
__FUNCTION__, inactive_count, inactive_size);
}
#endif
 
DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
__FUNCTION__, count, size, idle));
 
kgem->need_expire = !idle;
return !idle;
(void)count;
(void)size;
}
 
void kgem_cleanup_cache(struct kgem *kgem)
{
unsigned int i;
int n;
 
/* sync to the most recent request */
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
if (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
struct drm_i915_gem_set_domain set_domain;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
 
DBG(("%s: sync on cleanup\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
(void)drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN,
&set_domain);
}
}
 
kgem_retire(kgem);
kgem_cleanup(kgem);
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
while (!list_is_empty(&kgem->inactive[i]))
kgem_bo_free(kgem,
list_last_entry(&kgem->inactive[i],
struct kgem_bo, list));
}
 
kgem_clean_large_cache(kgem);
 
while (!list_is_empty(&kgem->snoop))
kgem_bo_free(kgem,
list_last_entry(&kgem->snoop,
struct kgem_bo, list));
 
while (__kgem_freed_bo) {
struct kgem_bo *bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
kgem->need_purge = false;
kgem->need_expire = false;
}
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
bool use_active = (flags & CREATE_INACTIVE) == 0;
struct list *cache;
 
DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
__FUNCTION__, num_pages, flags, use_active,
num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
MAX_CACHE_SIZE / PAGE_SIZE));
 
assert(num_pages);
 
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
DBG(("%s: searching large buffers\n", __FUNCTION__));
retry_large:
cache = use_active ? &kgem->large : &kgem->large_inactive;
list_for_each_entry_safe(bo, first, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
goto discard;
 
if (bo->tiling != I915_TILING_NONE) {
if (use_active)
goto discard;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
goto discard;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
goto discard;
 
list_del(&bo->list);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
 
bo->delta = 0;
assert_tiling(kgem, bo);
return bo;
 
discard:
if (!use_active)
kgem_bo_free(kgem, bo);
}
 
if (use_active) {
use_active = false;
goto retry_large;
}
 
if (__kgem_throttle_retire(kgem, flags))
goto retry_large;
 
return NULL;
}
 
if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: inactive and cache bucket empty\n",
__FUNCTION__));
 
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: can not retire\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
DBG(("%s: active cache bucket empty\n", __FUNCTION__));
return NULL;
}
 
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: active cache bucket still empty after retire\n",
__FUNCTION__));
return NULL;
}
}
 
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
DBG(("%s: searching for inactive %s map\n",
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), num_pages));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling &&
!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush);
assert_tiling(kgem, bo);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (flags & CREATE_EXACT)
return NULL;
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
return NULL;
}
 
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
list_for_each_entry(bo, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!!bo->rq == !!use_active);
assert(bo->proxy == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
continue;
 
if (use_active &&
kgem->gen <= 040 &&
bo->tiling != I915_TILING_NONE)
continue;
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
continue;
 
if (first)
continue;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->map) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (IS_CPU_MAP(bo->map) != for_cpu) {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
if (first != NULL)
break;
 
first = bo;
continue;
}
}
 
if (use_active)
kgem_bo_remove_from_active(kgem, bo);
else
kgem_bo_remove_from_inactive(kgem, bo);
 
assert(bo->tiling == I915_TILING_NONE);
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n",
__FUNCTION__, bo->handle, num_pages(bo),
use_active ? "active" : "inactive"));
assert(list_is_empty(&bo->list));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
assert_tiling(kgem, bo);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (first) {
assert(first->tiling == I915_TILING_NONE);
 
if (use_active)
kgem_bo_remove_from_active(kgem, first);
else
kgem_bo_remove_from_inactive(kgem, first);
 
first->pitch = 0;
first->delta = 0;
DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
__FUNCTION__, first->handle, num_pages(first),
use_active ? "active" : "inactive"));
assert(list_is_empty(&first->list));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
return first;
}
 
return NULL;
}
 
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
{
struct kgem_bo *bo;
uint32_t handle;
 
DBG(("%s(%d)\n", __FUNCTION__, size));
assert(size);
 
if (flags & CREATE_GTT_MAP && kgem->has_llc) {
flags &= ~CREATE_GTT_MAP;
flags |= CREATE_CPU_MAP;
}
 
size = NUM_PAGES(size);
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
if (bo) {
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_CACHED)
return NULL;
 
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
bo = __kgem_bo_alloc(handle, size);
if (bo == NULL) {
gem_close(kgem->fd, handle);
return NULL;
}
 
debug_alloc__bo(kgem, bo);
return bo;
}
 
inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
{
unsigned int size;
 
assert(bo->tiling);
assert_tiling(kgem, bo);
assert(kgem->gen < 040);
 
if (kgem->gen < 030)
size = 512 * 1024;
else
size = 1024 * 1024;
while (size < bytes(bo))
size *= 2;
 
return size;
}
 
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags)
{
struct list *cache;
struct kgem_bo *bo;
uint32_t pitch, tiled_height, size;
uint32_t handle;
int i, bucket, retry;
bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
 
if (tiling < 0)
exact = true, tiling = -tiling;
 
 
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
width, height, bpp, tiling, exact,
!!(flags & CREATE_INACTIVE),
!!(flags & CREATE_CPU_MAP),
!!(flags & CREATE_GTT_MAP),
!!(flags & CREATE_SCANOUT),
!!(flags & CREATE_PRIME),
!!(flags & CREATE_TEMPORARY)));
 
size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
assert(size && size <= kgem->max_object_size);
size /= PAGE_SIZE;
bucket = cache_bucket(size);
 
if (flags & CREATE_SCANOUT) {
struct kgem_bo *last = NULL;
 
list_for_each_entry_reverse(bo, &kgem->scanout, list) {
assert(bo->scanout);
assert(bo->delta);
assert(!bo->flush);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo) || num_pages(bo) > 2*size)
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (flags & CREATE_INACTIVE && bo->rq) {
last = bo;
continue;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
if (last) {
list_del(&last->list);
 
last->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
last->pitch, last->tiling, last->handle, last->unique_id));
assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
assert_tiling(kgem, last);
last->refcnt = 1;
return last;
}
 
bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch);
if (bo)
return bo;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
 
if (flags & CREATE_INACTIVE)
goto large_inactive;
 
tiled_height = kgem_aligned_height(kgem, height, tiling);
 
list_for_each_entry(bo, &kgem->large, list) {
assert(!bo->purged);
assert(!bo->scanout);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert_tiling(kgem, bo);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch || bo->tiling != tiling) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
bo->tiling = tiling;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
bo->flush = true;
return bo;
}
 
large_inactive:
__kgem_throttle_retire(kgem, flags);
list_for_each_entry(bo, &kgem->large_inactive, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo))
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
list_del(&bo->list);
 
assert(bo->domain != DOMAIN_GPU);
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->delta = 0;
DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
goto create;
}
 
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (kgem->has_llc && tiling == I915_TILING_NONE)
for_cpu = 1;
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
cache = &kgem->vma[for_cpu].inactive[bucket];
do {
list_for_each_entry(bo, cache, vma) {
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
DBG(("inactive vma with wrong tiling: %d < %d\n",
bo->tiling, tiling));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
assert(bo->tiling == tiling);
bo->pitch = pitch;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
bo->domain = DOMAIN_NONE;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
} while (!list_is_empty(cache) &&
__kgem_throttle_retire(kgem, flags));
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
if (list_is_empty(&kgem->active[bucket][tiling]) &&
list_is_empty(&kgem->inactive[bucket]))
flags &= ~CREATE_CACHED;
 
goto create;
}
}
 
if (flags & CREATE_INACTIVE)
goto skip_active_search;
 
/* Best active match */
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
retry = 3;
search_again:
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->active[bucket][tiling];
if (tiling) {
tiled_height = kgem_aligned_height(kgem, height, tiling);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert(!bo->scanout);
assert_tiling(kgem, bo);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch) {
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
} else {
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (num_pages(bo) < size)
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->pitch = pitch;
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
 
if (--retry && exact) {
if (kgem->gen >= 040) {
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
if (i == tiling)
continue;
 
cache = &kgem->active[bucket][i];
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (num_pages(bo) < size)
continue;
 
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = tiling;
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
}
 
bucket++;
goto search_again;
}
 
if (!exact) { /* allow an active near-miss? */
i = tiling;
while (--i >= 0) {
tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
cache = active(kgem, tiled_height / PAGE_SIZE, i);
tiled_height = kgem_aligned_height(kgem, height, i);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (bo->tiling) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
} else
bo->pitch = pitch;
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
}
 
skip_active_search:
bucket = cache_bucket(size);
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3)
retry = 3;
search_inactive:
/* Now just look for a close match and prefer any currently active */
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->inactive[bucket];
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->pitch = pitch;
bo->tiling = tiling;
 
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
assert(bo->pitch);
DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->refcnt == 0);
assert(bo->reusable);
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_INACTIVE &&
!list_is_empty(&kgem->active[bucket][tiling]) &&
__kgem_throttle_retire(kgem, flags)) {
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
if (--retry) {
bucket++;
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
create:
if (flags & CREATE_CACHED)
return NULL;
 
if (bucket >= NUM_CACHE_BUCKETS)
size = ALIGN(size, 1024);
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
bo = __kgem_bo_alloc(handle, size);
if (!bo) {
gem_close(kgem->fd, handle);
return NULL;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
bo->flush = true;
}
 
bo->unique_id = kgem_get_unique_id(kgem);
if (tiling == I915_TILING_NONE ||
gem_set_tiling(kgem->fd, handle, tiling, pitch)) {
bo->tiling = tiling;
bo->pitch = pitch;
} else {
if (flags & CREATE_EXACT) {
if (bo->pitch != pitch || bo->tiling != tiling) {
kgem_bo_free(kgem, bo);
return NULL;
}
}
}
 
assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
assert_tiling(kgem, bo);
 
debug_alloc__bo(kgem, bo);
 
DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id,
size, num_pages(bo), bucket(bo)));
return bo;
}
 
#if 0
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags)
{
struct kgem_bo *bo;
int stride, size;
 
if (DBG_NO_CPU)
return NULL;
 
DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
 
if (kgem->has_llc) {
bo = kgem_create_2d(kgem, width, height, bpp,
I915_TILING_NONE, flags);
if (bo == NULL)
return bo;
 
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
return bo;
}
 
assert(width > 0 && height > 0);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
size = stride * ALIGN(height, 2);
assert(size >= PAGE_SIZE);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
if (bo) {
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
assert(bo->snoop);
bo->refcnt = 1;
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_caching) {
bo = kgem_create_linear(kgem, size, flags);
if (bo == NULL)
return NULL;
 
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
 
if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
bo->snoop = true;
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_userptr) {
void *ptr;
 
/* XXX */
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
return NULL;
 
bo = kgem_create_map(kgem, ptr, size, false);
if (bo == NULL) {
free(ptr);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
return NULL;
}
#endif
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, proxy? %d\n",
__FUNCTION__, bo->handle, bo->proxy != NULL));
 
if (bo->proxy) {
_list_del(&bo->vma);
_list_del(&bo->request);
if (bo->io && bo->exec == NULL)
_kgem_bo_delete_buffer(kgem, bo);
kgem_bo_unref(kgem, bo->proxy);
kgem_bo_binding_free(kgem, bo);
free(bo);
return;
}
 
__kgem_bo_destroy(kgem, bo);
}
 
static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->rq);
assert(bo->exec == NULL);
assert(bo->needs_flush);
 
/* The kernel will emit a flush *and* update its own flushing lists. */
if (!__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
DBG(("%s: handle=%d, busy?=%d\n",
__FUNCTION__, bo->handle, bo->rq != NULL));
}
 
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
if (!bo->needs_flush)
return;
 
/* If the kernel fails to emit the flush, then it will be forced when
* we assume direct access. And as the usual failure is EIO, we do
* not actually care.
*/
assert(bo->exec == NULL);
if (bo->rq)
__kgem_flush(kgem, bo);
 
/* Whatever actually happens, we can regard the GTT write domain
* as being flushed.
*/
bo->gtt_dirty = false;
bo->needs_flush = false;
bo->domain = DOMAIN_NONE;
}
 
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
 
bool kgem_check_bo(struct kgem *kgem, ...)
{
va_list ap;
struct kgem_bo *bo;
int num_exec = 0;
int num_pages = 0;
bool flush = false;
 
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
while (bo->proxy)
bo = bo->proxy;
if (bo->exec)
continue;
 
if (needs_semaphore(kgem, bo))
return false;
 
num_pages += num_pages(bo);
num_exec++;
 
flush |= bo->flush;
}
va_end(ap);
 
DBG(("%s: num_pages=+%d, num_exec=+%d\n",
__FUNCTION__, num_pages, num_exec));
 
if (!num_pages)
return true;
 
if (kgem_flush(kgem, flush))
return false;
 
if (kgem->aperture > kgem->aperture_low &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
}
 
if (num_pages + kgem->aperture > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
return false;
}
 
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
return false;
}
 
return true;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domain,
uint32_t delta)
{
int index;
 
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
 
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
if( bo != NULL && bo->handle == -2)
{
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
__kgem_bo_mark_dirty(bo);
}
return 0;
};
 
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(bo->refcnt);
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
__FUNCTION__, bo->delta, bo->handle));
delta += bo->delta;
assert(bo->handle == bo->proxy->handle);
/* need to release the cache upon batch submit */
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
}
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty)
__kgem_bo_mark_dirty(bo);
 
bo = bo->proxy;
assert(bo->refcnt);
}
assert(bo->refcnt);
 
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(RQ_RING(bo->rq) == kgem->ring);
 
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(kgem->nfence < kgem->fence_max);
kgem->aperture_fenced +=
kgem_bo_fenced_size(kgem, bo);
kgem->nfence++;
}
bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
}
 
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
 
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
if (kgem->nreloc__self < 256)
kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
 
return delta;
}
 
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
 
DBG(("%s: type=%d, count=%d (bucket: %d)\n",
__FUNCTION__, type, kgem->vma[type].count, bucket));
if (kgem->vma[type].count <= 0)
return;
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
/* vma are limited on a per-process basis to around 64k.
* This includes all malloc arenas as well as other file
* mappings. In order to be fair and not hog the cache,
* and more importantly not to exhaust that limit and to
* start failing mappings, we keep our own number of open
* vma to within a conservative value.
*/
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
 
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
j++) {
struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
if (!list_is_empty(head))
bo = list_last_entry(head, struct kgem_bo, vma);
}
if (bo == NULL)
break;
 
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
assert(IS_CPU_MAP(bo->map) == type);
assert(bo->map);
assert(bo->rq == NULL);
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
 
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: freeing unpurgeable old mapping\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
}
}
}
 
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
return kgem_bo_map__cpu(kgem, bo);
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert(bo->exec == NULL);
assert_tiling(kgem, bo);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
(kgem->has_llc || bo->domain == DOMAIN_CPU)) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
ptr = kgem_bo_map__cpu(kgem, bo);
if (ptr)
kgem_bo_sync__cpu(kgem, bo);
return ptr;
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
/* XXX use PROT_READ to avoid the write flush? */
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_GTT;
bo->gtt_dirty = true;
}
}
 
return ptr;
}
 
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(bytes(bo) <= kgem->aperture_mappable / 4);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->map)
return MAP(bo->map);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
return bo->map = __kgem_bo_map__gtt(kgem, bo);
}
 
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
 
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n",
__FUNCTION__, bo->handle, bytes(bo));
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
 
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
 
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(bo->refcnt);
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
int err = errno;
 
assert(err != EINVAL);
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), err);
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
if (bo->map && bo->domain == DOMAIN_CPU) {
DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle));
kgem_bo_release_map(kgem, bo);
}
if (bo->map == NULL) {
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
}
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(!bo->scanout);
kgem_bo_submit(kgem, bo);
 
/* SHM pixmaps use proxies for subpage offsets */
assert(!bo->purged);
while (bo->proxy)
bo = bo->proxy;
assert(!bo->purged);
 
if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__, bo->handle,
bo->needs_flush, bo->domain,
__kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = I915_GEM_DOMAIN_CPU;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_CPU;
}
}
}
 
void kgem_clear_dirty(struct kgem *kgem)
{
struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
 
list_for_each_entry(bo, buffers, request) {
if (!bo->gpu_dirty)
break;
 
bo->gpu_dirty = false;
}
}
 
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length)
{
struct kgem_bo *bo;
 
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
offset, length, target->io));
 
bo = __kgem_bo_alloc(target->handle, length);
if (bo == NULL)
return NULL;
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->reusable = false;
bo->size.bytes = length;
 
bo->io = target->io && target->proxy == NULL;
bo->gpu_dirty = target->gpu_dirty;
bo->tiling = target->tiling;
bo->pitch = target->pitch;
bo->flush = target->flush;
bo->snoop = target->snoop;
 
assert(!bo->scanout);
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
 
if (target->exec) {
list_move_tail(&bo->request, &kgem->next_request->buffers);
bo->exec = &_kgem_dummy_exec;
}
bo->rq = target->rq;
 
return bo;
}
 
#if 0
static struct kgem_buffer *
buffer_alloc(void)
{
struct kgem_buffer *bo;
 
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
 
bo->mem = NULL;
bo->need_io = false;
bo->mmapped = true;
 
return bo;
}
 
static struct kgem_buffer *
buffer_alloc_with_data(int num_pages)
{
struct kgem_buffer *bo;
 
bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
if (bo == NULL)
return NULL;
 
bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
bo->mmapped = false;
return bo;
}
 
static inline bool
use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
{
if ((flags & KGEM_BUFFER_WRITE) == 0)
return kgem->gen >= 030;
 
return true;
}
 
static void
init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
{
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
assert(old->proxy == NULL);
 
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
list_replace(&old->request, &bo->base.request);
else
list_init(&bo->base.request);
list_replace(&old->vma, &bo->base.vma);
list_init(&bo->base.list);
free(old);
 
assert(bo->base.tiling == I915_TILING_NONE);
 
bo->base.refcnt = 1;
}
 
static struct kgem_buffer *
search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
{
struct kgem_buffer *bo;
struct kgem_bo *old;
 
old = search_snoop_cache(kgem, alloc, 0);
if (old) {
if (!old->io) {
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
} else {
bo = (struct kgem_buffer *)old;
bo->base.refcnt = 1;
}
 
DBG(("%s: created CPU handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, num_pages(&bo->base)));
 
assert(bo->base.snoop);
assert(bo->base.tiling == I915_TILING_NONE);
assert(num_pages(&bo->base) >= alloc);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL) {
bo->base.refcnt = 0;
kgem_bo_free(kgem, &bo->base);
bo = NULL;
}
 
return bo;
}
 
return NULL;
}
 
static struct kgem_buffer *
create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
{
struct kgem_buffer *bo;
uint32_t handle;
 
if (kgem->has_llc) {
struct kgem_bo *old;
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
old = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
if (old) {
init_buffer_from_bo(bo, old);
} else {
handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, alloc));
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem != NULL)
return bo;
 
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
 
if (kgem->has_caching) {
struct kgem_bo *old;
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
old = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
if (old) {
init_buffer_from_bo(bo, old);
} else {
handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created CPU handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, alloc));
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
goto free_caching;
 
bo->base.snoop = true;
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL)
goto free_caching;
 
return bo;
 
free_caching:
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
 
if (kgem->has_userptr) {
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
free(bo);
return NULL;
}
 
handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
if (handle == 0) {
free(bo->mem);
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created snoop handle=%d for buffer\n",
__FUNCTION__, bo->base.handle));
 
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->base.refcnt = 1;
bo->base.snoop = true;
bo->base.map = MAKE_USER_MAP(bo->mem);
 
return bo;
}
 
return NULL;
}
 
struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t size, uint32_t flags,
void **ret)
{
struct kgem_buffer *bo;
unsigned offset, alloc;
struct kgem_bo *old;
 
DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
__FUNCTION__, size, flags,
!!(flags & KGEM_BUFFER_WRITE),
!!(flags & KGEM_BUFFER_INPLACE),
!!(flags & KGEM_BUFFER_LAST)));
assert(size);
/* we should never be asked to create anything TOO large */
assert(size <= kgem->max_object_size);
 
#if !DBG_NO_UPLOAD_CACHE
list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
/* We can reuse any write buffer which we can fit */
if (flags == KGEM_BUFFER_LAST &&
bo->write == KGEM_BUFFER_WRITE &&
bo->base.refcnt == 1 && !bo->mmapped &&
size <= bytes(&bo->base)) {
DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
__FUNCTION__, size, bo->used, bytes(&bo->base)));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
kgem_buffer_release(kgem, bo);
bo->need_io = 0;
bo->write = 0;
offset = 0;
bo->used = size;
goto done;
}
 
if (flags & KGEM_BUFFER_WRITE) {
if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
(((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
!bo->base.snoop)) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
assert(bo->mmapped || bo->need_io);
} else {
if (bo->write & KGEM_BUFFER_WRITE) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
}
 
if (bo->used + size <= bytes(&bo->base)) {
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
__FUNCTION__, bo->used, size, bytes(&bo->base)));
offset = bo->used;
bo->used += size;
goto done;
}
}
 
if (flags & KGEM_BUFFER_WRITE) {
list_for_each_entry(bo, &kgem->active_buffers, base.list) {
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
assert(bo->mmapped);
assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop);
 
if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
 
if (bo->used + size <= bytes(&bo->base)) {
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
__FUNCTION__, bo->used, size, bytes(&bo->base)));
offset = bo->used;
bo->used += size;
list_move(&bo->base.list, &kgem->batch_buffers);
goto done;
}
}
}
#endif
 
#if !DBG_NO_MAP_UPLOAD
/* Be a little more generous and hope to hold fewer mmappings */
alloc = ALIGN(2*size, kgem->buffer_size);
if (alloc > MAX_CACHE_SIZE)
alloc = ALIGN(size, kgem->buffer_size);
if (alloc > MAX_CACHE_SIZE)
alloc = PAGE_ALIGN(size);
assert(alloc);
 
if (alloc > kgem->aperture_mappable / 4)
flags &= ~KGEM_BUFFER_INPLACE;
alloc /= PAGE_SIZE;
 
if (kgem->has_llc &&
(flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
bo = buffer_alloc();
if (bo == NULL)
goto skip_llc;
 
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
if (old == NULL)
old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
if (old) {
DBG(("%s: found LLC handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
init_buffer_from_bo(bo, old);
} else {
uint32_t handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
goto skip_llc;
}
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created LLC handle=%d for buffer\n",
__FUNCTION__, bo->base.handle));
 
debug_alloc(kgem, alloc);
}
 
assert(bo->mmapped);
assert(!bo->need_io);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
}
skip_llc:
 
if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
/* The issue with using a GTT upload buffer is that we may
* cause eviction-stalls in order to free up some GTT space.
* An is-mappable? ioctl could help us detect when we are
* about to block, or some per-page magic in the kernel.
*
* XXX This is especially noticeable on memory constrained
* devices like gen2 or with relatively slow gpu like i3.
*/
DBG(("%s: searching for an inactive GTT map for upload\n",
__FUNCTION__));
old = search_linear_cache(kgem, alloc,
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
#if HAVE_I915_GEM_BUFFER_INFO
if (old) {
struct drm_i915_gem_buffer_info info;
 
/* An example of such a non-blocking ioctl might work */
 
VG_CLEAR(info);
info.handle = handle;
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_BUFFER_INFO,
&fino) == 0) {
old->presumed_offset = info.addr;
if ((info.flags & I915_GEM_MAPPABLE) == 0) {
kgem_bo_move_to_inactive(kgem, old);
old = NULL;
}
}
}
#endif
if (old == NULL)
old = search_linear_cache(kgem, NUM_PAGES(size),
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
if (old == NULL) {
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old && !__kgem_bo_is_mappable(kgem, old)) {
_kgem_bo_destroy(kgem, old);
old = NULL;
}
}
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
assert(__kgem_bo_is_mappable(kgem, old));
assert(!old->snoop);
assert(old->rq == NULL);
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
assert(num_pages(&bo->base) >= NUM_PAGES(size));
 
assert(bo->mmapped);
assert(bo->base.refcnt == 1);
 
bo->mem = kgem_bo_map(kgem, &bo->base);
if (bo->mem) {
if (IS_CPU_MAP(bo->base.map))
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0;
kgem_bo_free(kgem, &bo->base);
}
}
}
#else
flags &= ~KGEM_BUFFER_INPLACE;
#endif
/* Be more parsimonious with pwrite/pread/cacheable buffers */
if ((flags & KGEM_BUFFER_INPLACE) == 0)
alloc = NUM_PAGES(size);
 
if (use_snoopable_buffer(kgem, flags)) {
bo = search_snoopable_buffer(kgem, alloc);
if (bo) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
}
 
if ((flags & KGEM_BUFFER_INPLACE) == 0) {
bo = create_snoopable_buffer(kgem, alloc);
if (bo)
goto init;
}
}
 
flags &= ~KGEM_BUFFER_INPLACE;
 
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, 0);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old) {
DBG(("%s: reusing ordinary handle %d for io\n",
__FUNCTION__, old->handle));
bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
bo->need_io = flags & KGEM_BUFFER_WRITE;
} else {
unsigned hint;
 
if (use_snoopable_buffer(kgem, flags)) {
bo = create_snoopable_buffer(kgem, alloc);
if (bo)
goto init;
}
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
hint = CREATE_INACTIVE;
if (flags & KGEM_BUFFER_WRITE)
hint |= CREATE_CPU_MAP;
old = search_linear_cache(kgem, alloc, hint);
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
init_buffer_from_bo(bo, old);
} else {
uint32_t handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
DBG(("%s: created handle=%d for buffer\n",
__FUNCTION__, handle));
 
__kgem_bo_init(&bo->base, handle, alloc);
debug_alloc(kgem, alloc * PAGE_SIZE);
}
 
assert(bo->mmapped);
assert(!bo->need_io);
assert(bo->base.refcnt == 1);
 
if (flags & KGEM_BUFFER_WRITE) {
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem != NULL) {
kgem_bo_sync__cpu(kgem, &bo->base);
goto init;
}
}
 
DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
old = &bo->base;
bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL) {
old->refcnt= 0;
kgem_bo_free(kgem, old);
return NULL;
}
 
init_buffer_from_bo(bo, old);
 
assert(bo->mem);
assert(!bo->mmapped);
assert(bo->base.refcnt == 1);
 
bo->need_io = flags & KGEM_BUFFER_WRITE;
}
init:
bo->base.io = true;
assert(bo->base.refcnt == 1);
assert(num_pages(&bo->base) >= NUM_PAGES(size));
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
assert(!bo->mmapped || bo->base.map != NULL);
 
bo->used = size;
bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
offset = 0;
 
assert(list_is_empty(&bo->base.list));
list_add(&bo->base.list, &kgem->batch_buffers);
 
DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
__FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
 
done:
bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
assert(bo->mem);
*ret = (char *)bo->mem + offset;
return kgem_create_proxy(kgem, &bo->base, offset, size);
}
 
bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
{
struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
return bo->write & KGEM_BUFFER_WRITE_INPLACE;
}
 
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret)
{
struct kgem_bo *bo;
int stride;
 
assert(width > 0 && height > 0);
assert(ret != NULL);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
if (bo == NULL) {
DBG(("%s: allocation failure for upload buffer\n",
__FUNCTION__));
return NULL;
}
assert(*ret != NULL);
assert(bo->proxy != NULL);
 
if (height & 1) {
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
int min;
 
assert(io->used);
 
/* Having padded this surface to ensure that accesses to
* the last pair of rows is valid, remove the padding so
* that it can be allocated to other pixmaps.
*/
min = bo->delta + height * stride;
min = ALIGN(min, UPLOAD_ALIGNMENT);
if (io->used != min) {
DBG(("%s: trimming buffer from %d to %d\n",
__FUNCTION__, io->used, min));
io->used = min;
}
bo->size.bytes -= stride;
}
 
bo->map = MAKE_CPU_MAP(*ret);
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
const void *data,
const BoxRec *box,
int stride, int bpp)
{
int width = box->x2 - box->x1;
int height = box->y2 - box->y1;
struct kgem_bo *bo;
void *dst;
 
if (!kgem_can_create_2d(kgem, width, height, bpp))
return NULL;
 
DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
__FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
 
assert(data);
assert(width > 0);
assert(height > 0);
assert(stride);
assert(bpp);
 
bo = kgem_create_buffer_2d(kgem,
width, height, bpp,
KGEM_BUFFER_WRITE_INPLACE, &dst);
if (bo)
memcpy_blt(data, dst, bpp,
stride, bo->pitch,
box->x1, box->y1,
0, 0,
width, height);
 
return bo;
}
 
void kgem_proxy_bo_attach(struct kgem_bo *bo,
struct kgem_bo **ptr)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->map == NULL || IS_CPU_MAP(bo->map));
assert(bo->proxy);
list_add(&bo->vma, &bo->proxy->vma);
bo->map = ptr;
*ptr = kgem_bo_reference(bo);
}
 
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
{
struct kgem_buffer *bo;
uint32_t offset = _bo->delta, length = _bo->size.bytes;
 
/* We expect the caller to have already submitted the batch */
assert(_bo->io);
assert(_bo->exec == NULL);
assert(_bo->rq == NULL);
assert(_bo->proxy);
 
_bo = _bo->proxy;
assert(_bo->proxy == NULL);
assert(_bo->exec == NULL);
 
bo = (struct kgem_buffer *)_bo;
 
DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
offset, length, bo->base.snoop));
 
if (bo->mmapped) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__,
bo->base.needs_flush,
bo->base.domain,
__kgem_busy(kgem, bo->base.handle)));
 
assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc);
 
VG_CLEAR(set_domain);
set_domain.handle = bo->base.handle;
set_domain.write_domain = 0;
set_domain.read_domains =
IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
 
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
return;
} else {
if (gem_read(kgem->fd,
bo->base.handle, (char *)bo->mem+offset,
offset, length))
return;
}
kgem_bo_retire(kgem, &bo->base);
bo->base.domain = DOMAIN_NONE;
}
#endif
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b && b->offset; b = b->next)
if (format == b->format)
return b->offset;
 
return 0;
}
 
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b; b = b->next) {
if (b->offset)
continue;
 
b->offset = offset;
b->format = format;
 
if (b->next)
b->next->offset = 0;
 
return;
}
 
b = malloc(sizeof(*b));
if (b) {
b->next = bo->binding.next;
b->format = format;
b->offset = offset;
bo->binding.next = b;
}
}
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
bo = __kgem_bo_alloc(-2, size);
if (!bo) {
return 0;
}
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = fb->pitch;
bo->tiling = I915_TILING_NONE;
bo->scanout = 1;
fb->fb_bo = bo;
 
// printf("fb width %d height %d pitch %d bo %p\n",
// fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
};
 
 
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
bo = fb->fb_bo;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
fb->fb_bo = bo;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
if((size != bo->size.pages.count) ||
(fb->pitch != bo->pitch))
{
bo->size.pages.count = size;
bo->pitch = fb->pitch;
 
printf("fb width %d height %d pitch %d bo %p\n",
fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
}
 
return 0;
};
 
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_destroy(kgem, bo);
kgem_bo_free(kgem, bo);
}
 
 
void kgem_close_batches(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
};
 
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
int pitch, int height)
{
struct kgem_bo *bo;
int size;
 
size = pitch * height / PAGE_SIZE;
 
bo = __kgem_bo_alloc(handle, size);
if(bo == NULL)
return NULL;
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = I915_TILING_X;
bo->scanout = 0;
 
return bo;
}
/drivers/video/Intel-2D/kgem.h
28,12 → 28,10
#ifndef KGEM_H
#define KGEM_H
 
#define HAS_DEBUG_FULL 1
 
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdarg.h>
#include <stdio.h>
 
#include <i915_drm.h>
 
40,12 → 38,24
#include "compiler.h"
#include "intel_list.h"
 
#undef DBG
#include <limits.h>
#if !defined(MAXSHORT) || !defined(MINSHORT) || \
!defined(MAXINT) || !defined(MININT)
/*
* Some implementations #define these through <math.h>, so preclude
* #include'ing it later.
*/
 
#if HAS_DEBUG_FULL
#define DBG(x) printf x
#else
#define DBG(x)
#include <math.h>
#undef MAXSHORT
#define MAXSHORT SHRT_MAX
#undef MINSHORT
#define MINSHORT SHRT_MIN
#undef MAXINT
#define MAXINT INT_MAX
#undef MININT
#define MININT INT_MIN
 
#endif
 
struct kgem_bo {
64,6 → 74,7
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
struct kgem_bo_binding {
struct kgem_bo_binding *next;
90,7 → 101,8
uint32_t pitch : 18; /* max 128k */
uint32_t tiling : 2;
uint32_t reusable : 1;
uint32_t dirty : 1;
uint32_t gpu_dirty : 1;
uint32_t gtt_dirty : 1;
uint32_t domain : 2;
uint32_t needs_flush : 1;
uint32_t snoop : 1;
173,6 → 185,7
uint32_t scanout_busy:1;
uint32_t busy:1;
 
uint32_t has_create2 :1;
uint32_t has_userptr :1;
uint32_t has_blt :1;
uint32_t has_relaxed_fencing :1;
180,8 → 193,9
uint32_t has_semaphores :1;
uint32_t has_secure_batches :1;
uint32_t has_pinned_batches :1;
uint32_t has_cacheing :1;
uint32_t has_caching :1;
uint32_t has_llc :1;
uint32_t has_wt :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
 
200,10 → 214,23
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
 
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
struct drm_i915_gem_relocation_entry reloc[4096];
#if 0
void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
#endif
 
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[384] page_aligned;
struct drm_i915_gem_relocation_entry reloc[8192] page_aligned;
 
#ifdef DEBUG_MEMORY
struct {
213,9 → 240,11
#endif
};
 
#define KGEM_MAX_DEFERRED_VBO 16
 
#define KGEM_BATCH_RESERVED 1
#define KGEM_RELOC_RESERVED 4
#define KGEM_EXEC_RESERVED 1
#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO)
#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO)
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
233,6 → 262,8
bool read_only);
 
struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size);
int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo);
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags);
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
239,6 → 270,7
struct kgem_bo *target,
int offset, int length);
 
void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr);
 
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
248,6 → 280,8
#define KGEM_CAN_CREATE_LARGE 0x4
#define KGEM_CAN_CREATE_GTT 0x8
 
uint32_t kgem_get_unique_id(struct kgem *kgem);
 
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
281,7 → 315,6
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_retire(struct kgem *kgem);
 
304,6 → 337,11
return kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline bool __kgem_ring_empty(struct kgem *kgem)
{
return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]);
}
 
void _kgem_submit(struct kgem *kgem);
static inline void kgem_submit(struct kgem *kgem)
{
325,21 → 363,8
_kgem_submit(kgem);
}
 
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo);
 
if (!bo->needs_flush)
return;
 
/* If the kernel fails to emit the flush, then it will be forced when
* we assume direct access. And as the useual failure is EIO, we do
* not actualy care.
*/
__kgem_flush(kgem, bo);
}
 
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
{
assert(bo->refcnt);
367,6 → 392,9
kgem_submit(kgem);
#endif
 
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
_kgem_submit(kgem);
 
if (kgem->mode == mode)
return;
 
419,6 → 447,11
 
static inline uint32_t *kgem_get_batch(struct kgem *kgem)
{
if (kgem->nreloc) {
unsigned mode = kgem->mode;
_kgem_submit(kgem);
_kgem_set_mode(kgem, mode);
}
 
return kgem->batch + kgem->nbatch;
}
512,6 → 545,12
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
if (kgem->has_llc && bo->tiling == I915_TILING_NONE)
return true;
 
if (!bo->presumed_offset)
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 
544,7 → 583,7
if (kgem_bo_mapped(kgem, bo))
return true;
 
if (!bo->tiling && kgem->has_llc)
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
553,6 → 592,22
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
}
 
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
struct kgem_bo *bo,
bool write)
{
if (bo->purged || (bo->scanout && write))
return false;
 
if (kgem->has_llc)
return true;
 
if (bo->domain != DOMAIN_CPU)
return false;
 
return !write || bo->exec == NULL;
}
 
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
assert(bo->refcnt);
561,6 → 616,8
return bo->snoop;
}
 
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo);
 
bool __kgem_busy(struct kgem *kgem, int handle);
 
static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
570,10 → 627,12
 
inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
{
bo->needs_flush = false;
bo->rq = NULL;
list_del(&bo->request);
bo->rq = NULL;
 
bo->domain = DOMAIN_NONE;
bo->needs_flush = false;
bo->gtt_dirty = false;
}
 
static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
584,8 → 643,6
return bo->rq;
}
 
/*
 
static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
604,8 → 661,24
return kgem_bo_is_busy(bo);
}
 
*/
static inline bool kgem_bo_is_render(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__,
bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq)));
assert(bo->refcnt);
return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER;
}
 
static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo)
{
while (bo->proxy) {
bo->flush = true;
bo = bo->proxy;
}
bo->flush = true;
bo->reusable = false;
}
 
static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
{
if (bo == NULL)
612,7 → 685,7
return false;
 
assert(bo->refcnt);
return bo->dirty;
return bo->gpu_dirty;
}
 
static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
632,7 → 705,7
bo->handle, bo->proxy != NULL));
 
bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
bo->needs_flush = bo->dirty = true;
bo->needs_flush = bo->gpu_dirty = true;
list_move(&bo->request, &RQ(bo->rq)->buffers);
}
 
643,7 → 716,7
assert(bo->exec);
assert(bo->rq);
 
if (bo->dirty)
if (bo->gpu_dirty)
return;
 
__kgem_bo_mark_dirty(bo);
672,6 → 745,9
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
 
void kgem_clean_scanout_cache(struct kgem *kgem);
void kgem_clean_large_cache(struct kgem *kgem);
 
#if HAS_DEBUG_FULL
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
#else
/drivers/video/Intel-2D/kgem_debug.c
37,22 → 37,6
 
#include "kgem_debug.h"
 
#include <kos32sys.h>
 
/*
void
ErrorF(const char *f, ...)
{
va_list args;
 
va_start(args, f);
VErrorF(f, args);
va_end(args);
}
*/
 
#define ErrorF printf
 
struct drm_i915_gem_relocation_entry *
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset)
{
388,8 → 372,6
 
static int (*decode_3d(int gen))(struct kgem*, uint32_t)
{
return kgem_gen6_decode_3d;
/*
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_decode_3d;
401,18 → 383,12
return kgem_gen4_decode_3d;
} else if (gen >= 030) {
return kgem_gen3_decode_3d;
} else if (gen >= 020) {
return kgem_gen2_decode_3d;
}
assert(0);
*/
}
 
static void (*finish_state(int gen))(struct kgem*)
{
 
return kgem_gen6_finish_state;
/*
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_finish_state;
424,11 → 400,8
return kgem_gen4_finish_state;
} else if (gen >= 030) {
return kgem_gen3_finish_state;
} else if (gen >= 020) {
return kgem_gen2_finish_state;
}
assert(0);
*/
}
 
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
/drivers/video/Intel-2D/kgem_debug_gen3.c
0,0 → 1,1599
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen3_render.h"
 
#include "kgem_debug.h"
 
enum type {
T_FLOAT32,
T_FLOAT16,
};
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb;
struct vertex_elements {
int offset;
bool valid;
enum type type;
int size;
uint8_t swizzle[4];
} ve[33];
int num_ve;
} state;
 
static float int_as_float(int i)
{
union {
float f;
int i;
} x;
x.i = i;
return x.f;
}
 
static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
uint32_t offset)
{
uint32_t handle;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
offset *= sizeof(uint32_t);
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == offset)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
state.vb.current = bo;
state.vb.base = base;
state.vb.ptr = ptr;
}
 
static void gen3_update_vertex_buffer_pitch(struct kgem *kgem,
uint32_t offset)
{
state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f;
state.vb.pitch *= sizeof(uint32_t);
}
 
static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data)
{
state.ve[1].valid = 1;
 
switch ((data >> 6) & 7) {
case 1:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 3;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 1;
state.ve[1].swizzle[3] = 3;
break;
case 2:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 4;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 1;
state.ve[1].swizzle[3] = 1;
break;
case 3:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 2;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 2;
state.ve[1].swizzle[3] = 3;
break;
case 4:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 3;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 3;
state.ve[1].swizzle[3] = 1;
break;
}
 
state.ve[2].valid = 0;
state.ve[3].valid = 0;
}
 
static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data)
{
int id;
for (id = 0; id < 8; id++) {
uint32_t fmt = (data >> (id*4)) & 0xf;
int width;
 
state.ve[id+4].valid = fmt != 0xf;
 
width = 0;
switch (fmt) {
case 0:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 2;
break;
case 1:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 3;
break;
case 2:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 4;
break;
case 3:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 1;
break;
case 4:
state.ve[id+4].type = T_FLOAT16;
width = state.ve[id+4].size = 2;
break;
case 5:
state.ve[id+4].type = T_FLOAT16;
width = state.ve[id+4].size = 4;
break;
}
 
state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2;
state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2;
state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2;
state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2;
}
}
 
static void gen3_update_vertex_elements_offsets(struct kgem *kgem)
{
int i, offset;
 
for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) {
if (!state.ve[i].valid)
continue;
 
state.ve[i].offset = offset;
offset += 4 * state.ve[i].size;
state.num_ve = i;
}
}
 
static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < max-1)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case T_FLOAT32:
vertices_float32_out(ve, ptr, ve->size);
break;
case T_FLOAT16:
//vertices_float16_out(ve, ptr, ve->size);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
const struct vertex_buffer *vb = &state.vb;
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static int inline_vertex_out(struct kgem *kgem, void *base)
{
const struct vertex_buffer *vb = &state.vb;
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const void *ptr = (char *)base + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
 
return vb->pitch;
}
 
static int
gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
uint32_t opcode;
 
opcode = (data[0] & 0x00f80000) >> 19;
 
switch (opcode) {
case 0x11:
kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n",
data[0]&1?"enabled":"disabled");
return 1;
case 0x01:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
return 1;
case 0x0a:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n");
return 1;
case 0x05:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
return 1;
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
opcode);
assert(0);
return 1;
}
 
/** Sets the string dstname to describe the destination of the PS instruction */
static void
gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
{
uint32_t a0 = data[i];
int dst_nr = (a0 >> 14) & 0xf;
char dstmask[8];
const char *sat;
 
if (do_mask) {
if (((a0 >> 10) & 0xf) == 0xf) {
dstmask[0] = 0;
} else {
int dstmask_index = 0;
 
dstmask[dstmask_index++] = '.';
if (a0 & (1 << 10))
dstmask[dstmask_index++] = 'x';
if (a0 & (1 << 11))
dstmask[dstmask_index++] = 'y';
if (a0 & (1 << 12))
dstmask[dstmask_index++] = 'z';
if (a0 & (1 << 13))
dstmask[dstmask_index++] = 'w';
dstmask[dstmask_index++] = 0;
}
 
if (a0 & (1 << 22))
sat = ".sat";
else
sat = "";
} else {
dstmask[0] = 0;
sat = "";
}
 
switch ((a0 >> 19) & 0x7) {
case 0:
assert(dst_nr <= 15);
sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
break;
case 4:
assert(dst_nr == 0);
sprintf(dstname, "oC%s%s", dstmask, sat);
break;
case 5:
assert(dst_nr == 0);
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
assert(dst_nr <= 3);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
default:
sprintf(dstname, "RESERVED");
break;
}
}
 
static const char *
gen3_get_channel_swizzle(uint32_t select)
{
switch (select & 0x7) {
case 0:
return (select & 8) ? "-x" : "x";
case 1:
return (select & 8) ? "-y" : "y";
case 2:
return (select & 8) ? "-z" : "z";
case 3:
return (select & 8) ? "-w" : "w";
case 4:
return (select & 8) ? "-0" : "0";
case 5:
return (select & 8) ? "-1" : "1";
default:
return (select & 8) ? "-bad" : "bad";
}
}
 
static void
gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
{
switch (src_type) {
case 0:
sprintf(name, "R%d", src_nr);
assert(src_nr <= 15);
break;
case 1:
if (src_nr < 8)
sprintf(name, "T%d", src_nr);
else if (src_nr == 8)
sprintf(name, "DIFFUSE");
else if (src_nr == 9)
sprintf(name, "SPECULAR");
else if (src_nr == 10)
sprintf(name, "FOG");
else {
assert(0);
sprintf(name, "RESERVED");
}
break;
case 2:
sprintf(name, "C%d", src_nr);
assert(src_nr <= 31);
break;
case 4:
sprintf(name, "oC");
assert(src_nr == 0);
break;
case 5:
sprintf(name, "oD");
assert(src_nr == 0);
break;
case 6:
sprintf(name, "U%d", src_nr);
assert(src_nr <= 3);
break;
default:
sprintf(name, "RESERVED");
assert(0);
break;
}
}
 
static void
gen3_get_instruction_src0(uint32_t *data, int i, char *srcname)
{
uint32_t a0 = data[i];
uint32_t a1 = data[i + 1];
int src_nr = (a0 >> 2) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_src1(uint32_t *data, int i, char *srcname)
{
uint32_t a1 = data[i + 1];
uint32_t a2 = data[i + 2];
int src_nr = (a1 >> 8) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_src2(uint32_t *data, int i, char *srcname)
{
uint32_t a2 = data[i + 2];
int src_nr = (a2 >> 16) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
{
switch (src_type) {
case 0:
sprintf(name, "R%d", src_nr);
assert(src_nr <= 15);
break;
case 1:
if (src_nr < 8)
sprintf(name, "T%d", src_nr);
else if (src_nr == 8)
sprintf(name, "DIFFUSE");
else if (src_nr == 9)
sprintf(name, "SPECULAR");
else if (src_nr == 10)
sprintf(name, "FOG");
else {
assert(0);
sprintf(name, "RESERVED");
}
break;
case 4:
sprintf(name, "oC");
assert(src_nr == 0);
break;
case 5:
sprintf(name, "oD");
assert(src_nr == 0);
break;
default:
assert(0);
sprintf(name, "RESERVED");
break;
}
}
 
static void
gen3_decode_alu1(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix,
op_name, dst, src0);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_alu2(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100], src1[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
gen3_get_instruction_src1(data, i, src1);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
op_name, dst, src0, src1);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_alu3(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100], src1[100], src2[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
gen3_get_instruction_src1(data, i, src1);
gen3_get_instruction_src2(data, i, src2);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
op_name, dst, src0, src1, src2);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix,
const char *tex_name)
{
uint32_t t0 = data[i];
uint32_t t1 = data[i + 1];
char dst_name[100];
char addr_name[100];
int sampler_nr;
 
gen3_get_instruction_dst(data, i, dst_name, 0);
gen3_get_instruction_addr((t1 >> 24) & 0x7,
(t1 >> 17) & 0xf,
addr_name);
sampler_nr = t0 & 0xf;
 
kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
tex_name, dst_name, sampler_nr, addr_name);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix)
{
uint32_t d0 = data[i];
const char *sampletype;
int dcl_nr = (d0 >> 14) & 0xf;
const char *dcl_x = d0 & (1 << 10) ? "x" : "";
const char *dcl_y = d0 & (1 << 11) ? "y" : "";
const char *dcl_z = d0 & (1 << 12) ? "z" : "";
const char *dcl_w = d0 & (1 << 13) ? "w" : "";
char dcl_mask[10];
 
switch ((d0 >> 19) & 0x3) {
case 1:
sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
assert (strcmp(dcl_mask, "."));
 
assert(dcl_nr <= 10);
if (dcl_nr < 8) {
if (strcmp(dcl_mask, ".x") != 0 &&
strcmp(dcl_mask, ".xy") != 0 &&
strcmp(dcl_mask, ".xz") != 0 &&
strcmp(dcl_mask, ".w") != 0 &&
strcmp(dcl_mask, ".xyzw") != 0) {
assert(0);
}
kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix,
dcl_nr, dcl_mask);
} else {
if (strcmp(dcl_mask, ".xz") == 0)
assert(0);
else if (strcmp(dcl_mask, ".xw") == 0)
assert(0);
else if (strcmp(dcl_mask, ".xzw") == 0)
assert(0);
 
if (dcl_nr == 8) {
kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
dcl_mask);
} else if (dcl_nr == 9) {
kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
dcl_mask);
} else if (dcl_nr == 10) {
kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix,
dcl_mask);
}
}
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
case 3:
switch ((d0 >> 22) & 0x3) {
case 0:
sampletype = "2D";
break;
case 1:
sampletype = "CUBE";
break;
case 2:
sampletype = "3D";
break;
default:
sampletype = "RESERVED";
break;
}
assert(dcl_nr <= 15);
kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix,
dcl_nr, sampletype);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
default:
kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
}
 
static void
gen3_decode_instruction(uint32_t *data, uint32_t offset,
int i, char *instr_prefix)
{
switch ((data[i] >> 24) & 0x1f) {
case 0x0:
kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
case 0x01:
gen3_decode_alu2(data, offset, i, instr_prefix, "ADD");
break;
case 0x02:
gen3_decode_alu1(data, offset, i, instr_prefix, "MOV");
break;
case 0x03:
gen3_decode_alu2(data, offset, i, instr_prefix, "MUL");
break;
case 0x04:
gen3_decode_alu3(data, offset, i, instr_prefix, "MAD");
break;
case 0x05:
gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD");
break;
case 0x06:
gen3_decode_alu2(data, offset, i, instr_prefix, "DP3");
break;
case 0x07:
gen3_decode_alu2(data, offset, i, instr_prefix, "DP4");
break;
case 0x08:
gen3_decode_alu1(data, offset, i, instr_prefix, "FRC");
break;
case 0x09:
gen3_decode_alu1(data, offset, i, instr_prefix, "RCP");
break;
case 0x0a:
gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ");
break;
case 0x0b:
gen3_decode_alu1(data, offset, i, instr_prefix, "EXP");
break;
case 0x0c:
gen3_decode_alu1(data, offset, i, instr_prefix, "LOG");
break;
case 0x0d:
gen3_decode_alu2(data, offset, i, instr_prefix, "CMP");
break;
case 0x0e:
gen3_decode_alu2(data, offset, i, instr_prefix, "MIN");
break;
case 0x0f:
gen3_decode_alu2(data, offset, i, instr_prefix, "MAX");
break;
case 0x10:
gen3_decode_alu1(data, offset, i, instr_prefix, "FLR");
break;
case 0x11:
gen3_decode_alu1(data, offset, i, instr_prefix, "MOD");
break;
case 0x12:
gen3_decode_alu1(data, offset, i, instr_prefix, "TRC");
break;
case 0x13:
gen3_decode_alu2(data, offset, i, instr_prefix, "SGE");
break;
case 0x14:
gen3_decode_alu2(data, offset, i, instr_prefix, "SLT");
break;
case 0x15:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD");
break;
case 0x16:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP");
break;
case 0x17:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB");
break;
case 0x19:
gen3_decode_dcl(data, offset, i, instr_prefix);
break;
default:
kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
}
}
 
static const char *
gen3_decode_compare_func(uint32_t op)
{
switch (op&0x7) {
case 0: return "always";
case 1: return "never";
case 2: return "less";
case 3: return "equal";
case 4: return "lequal";
case 5: return "greater";
case 6: return "notequal";
case 7: return "gequal";
}
return "";
}
 
static const char *
gen3_decode_stencil_op(uint32_t op)
{
switch (op&0x7) {
case 0: return "keep";
case 1: return "zero";
case 2: return "replace";
case 3: return "incr_sat";
case 4: return "decr_sat";
case 5: return "greater";
case 6: return "incr";
case 7: return "decr";
}
return "";
}
 
#if 0
/* part of MODES_4 */
static const char *
gen3_decode_logic_op(uint32_t op)
{
switch (op&0xf) {
case 0: return "clear";
case 1: return "nor";
case 2: return "and_inv";
case 3: return "copy_inv";
case 4: return "and_rvrse";
case 5: return "inv";
case 6: return "xor";
case 7: return "nand";
case 8: return "and";
case 9: return "equiv";
case 10: return "noop";
case 11: return "or_inv";
case 12: return "copy";
case 13: return "or_rvrse";
case 14: return "or";
case 15: return "set";
}
return "";
}
#endif
 
static const char *
gen3_decode_blend_fact(uint32_t op)
{
switch (op&0xf) {
case 1: return "zero";
case 2: return "one";
case 3: return "src_colr";
case 4: return "inv_src_colr";
case 5: return "src_alpha";
case 6: return "inv_src_alpha";
case 7: return "dst_alpha";
case 8: return "inv_dst_alpha";
case 9: return "dst_colr";
case 10: return "inv_dst_colr";
case 11: return "src_alpha_sat";
case 12: return "cnst_colr";
case 13: return "inv_cnst_colr";
case 14: return "cnst_alpha";
case 15: return "inv_const_alpha";
}
return "";
}
 
static const char *
decode_tex_coord_mode(uint32_t mode)
{
switch (mode&0x7) {
case 0: return "wrap";
case 1: return "mirror";
case 2: return "clamp_edge";
case 3: return "cube";
case 4: return "clamp_border";
case 5: return "mirror_once";
}
return "";
}
 
static const char *
gen3_decode_sample_filter(uint32_t mode)
{
switch (mode&0x7) {
case 0: return "nearest";
case 1: return "linear";
case 2: return "anisotropic";
case 3: return "4x4_1";
case 4: return "4x4_2";
case 5: return "4x4_flat";
case 6: return "6x5_mono";
}
return "";
}
 
static int
gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset)
{
const uint32_t *data = kgem->batch + offset;
int len, i, word;
 
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
switch (word) {
case 0:
kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n",
data[i]&(~1),data[i]&1?", auto cache invalidate disabled":"");
gen3_update_vertex_buffer_addr(kgem, offset + i);
break;
case 1:
kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n",
(data[i]>>24)&0x3f,(data[i]>>16)&0x3f);
gen3_update_vertex_buffer_pitch(kgem, offset + i);
break;
case 2:
{
char buf[200];
int len = 0;
int tex_num;
for (tex_num = 0; tex_num < 8; tex_num++) {
switch((data[i]>>tex_num*4)&0xf) {
case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break;
case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break;
case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break;
case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break;
case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break;
case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break;
case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break;
}
}
kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf);
gen3_update_vertex_texcoords(kgem, data[i]);
}
 
break;
case 3:
kgem_debug_print(data, offset, i, "S3: not documented\n");
break;
case 4:
{
const char *cullmode = "";
const char *vfmt_xyzw = "";
switch((data[i]>>13)&0x3) {
case 0: cullmode = "both"; break;
case 1: cullmode = "none"; break;
case 2: cullmode = "cw"; break;
case 3: cullmode = "ccw"; break;
}
switch(data[i] & (7<<6 | 1<<2)) {
case 1<<6: vfmt_xyzw = "XYZ,"; break;
case 2<<6: vfmt_xyzw = "XYZW,"; break;
case 3<<6: vfmt_xyzw = "XY,"; break;
case 4<<6: vfmt_xyzw = "XYW,"; break;
case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break;
case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break;
case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break;
case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break;
}
kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f,"
"%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s%s%s "
"%s%s%s\n",
(data[i]>>23)&0x1ff,
((data[i]>>19)&0xf) / 2.0,
data[i]&(0xf<<15)?" flatshade=":"",
data[i]&(1<<18)?"Alpha,":"",
data[i]&(1<<17)?"Fog,":"",
data[i]&(1<<16)?"Specular,":"",
data[i]&(1<<15)?"Color,":"",
cullmode,
data[i]&(1<<12)?"PointWidth,":"",
data[i]&(1<<11)?"SpecFog,":"",
data[i]&(1<<10)?"Color,":"",
data[i]&(1<<9)?"DepthOfs,":"",
vfmt_xyzw,
data[i]&(1<<9)?"FogParam,":"",
data[i]&(1<<5)?"force default diffuse, ":"",
data[i]&(1<<4)?"force default specular, ":"",
data[i]&(1<<3)?"local depth ofs enable, ":"",
data[i]&(1<<1)?"point sprite enable, ":"",
data[i]&(1<<0)?"line AA enable, ":"");
gen3_update_vertex_elements(kgem, data[i]);
break;
}
case 5:
{
kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s"
"%s%s%s%s stencil_ref=0x%x, stencil_test=%s, "
"stencil_fail=%s, stencil_pass_z_fail=%s, "
"stencil_pass_z_pass=%s, %s%s%s%s\n",
data[i]&(0xf<<28)?" write_disable=":"",
data[i]&(1<<31)?"Alpha,":"",
data[i]&(1<<30)?"Red,":"",
data[i]&(1<<29)?"Green,":"",
data[i]&(1<<28)?"Blue,":"",
data[i]&(1<<27)?" force default point size,":"",
data[i]&(1<<26)?" last pixel enable,":"",
data[i]&(1<<25)?" global depth ofs enable,":"",
data[i]&(1<<24)?" fog enable,":"",
(data[i]>>16)&0xff,
gen3_decode_compare_func(data[i]>>13),
gen3_decode_stencil_op(data[i]>>10),
gen3_decode_stencil_op(data[i]>>7),
gen3_decode_stencil_op(data[i]>>4),
data[i]&(1<<3)?"stencil write enable, ":"",
data[i]&(1<<2)?"stencil test enable, ":"",
data[i]&(1<<1)?"color dither enable, ":"",
data[i]&(1<<0)?"logicop enable, ":"");
}
break;
case 6:
kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, "
"depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, "
"%s%stristrip_provoking_vertex=%i\n",
data[i]&(1<<31)?"alpha test enable, ":"",
gen3_decode_compare_func(data[i]>>28),
data[i]&(0xff<<20),
gen3_decode_compare_func(data[i]>>16),
data[i]&(1<<15)?"cbuf blend enable, ":"",
gen3_decode_blend_fact(data[i]>>8),
gen3_decode_blend_fact(data[i]>>4),
data[i]&(1<<3)?"depth write enable, ":"",
data[i]&(1<<2)?"cbuf write enable, ":"",
data[i]&(0x3));
break;
case 7:
kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]);
break;
}
i++;
}
}
 
assert(len == i);
return len;
}
 
static int
gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
unsigned int len, i, c, idx, word, map, sampler, instr;
const char *format, *zformat, *type;
uint32_t opcode;
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes_3d_1d[] = {
{ 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
{ 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
{ 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
{ 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 4, 4, "3DSTATE_FOG_MODE" },
{ 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
{ 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
}, *opcode_3d_1d;
 
opcode = (data[0] & 0x00ff0000) >> 16;
 
switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
* stated to be required in another, and 0 length LOAD_INDIRECTs appear
* to cause no harm at least.
*/
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n");
len = (data[0] & 0x000000ff) + 1;
i = 1;
if (data[0] & (0x01 << 8)) {
kgem_debug_print(data, offset, i++, "SIS.0\n");
kgem_debug_print(data, offset, i++, "SIS.1\n");
}
if (data[0] & (0x02 << 8)) {
kgem_debug_print(data, offset, i++, "DIS.0\n");
}
if (data[0] & (0x04 << 8)) {
kgem_debug_print(data, offset, i++, "SSB.0\n");
kgem_debug_print(data, offset, i++, "SSB.1\n");
}
if (data[0] & (0x08 << 8)) {
kgem_debug_print(data, offset, i++, "MSB.0\n");
kgem_debug_print(data, offset, i++, "MSB.1\n");
}
if (data[0] & (0x10 << 8)) {
kgem_debug_print(data, offset, i++, "PSP.0\n");
kgem_debug_print(data, offset, i++, "PSP.1\n");
}
if (data[0] & (0x20 << 8)) {
kgem_debug_print(data, offset, i++, "PSC.0\n");
kgem_debug_print(data, offset, i++, "PSC.1\n");
}
assert(len == i);
return len;
case 0x04:
return gen3_decode_load_state_immediate_1(kgem, offset);
case 0x03:
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
for (word = 6; word <= 14; word++) {
if (data[0] & (1 << word)) {
if (word == 6)
kgem_debug_print(data, offset, i++, "TBCF\n");
else if (word >= 7 && word <= 10) {
kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7);
kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7);
} else if (word >= 11 && word <= 14) {
kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n",
word - 11,
data[i]&0xfffffffe,
data[i]&1?"use fence":"");
i++;
kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n",
word - 11,
data[i]>>21, (data[i]>>10)&0x3ff,
data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):"");
i++;
kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n",
word - 11,
((data[i]>>21) + 1)*4);
i++;
kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11);
kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11);
}
}
}
assert(len == i);
return len;
case 0x00:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n");
len = (data[0] & 0x0000003f) + 2;
kgem_debug_print(data, offset, 1, "mask\n");
 
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
int width, height, pitch, dword;
struct drm_i915_gem_relocation_entry *reloc;
const char *tiling;
 
reloc = kgem_debug_get_reloc_entry(kgem, &data[i] - kgem->batch);
assert(reloc->target_handle);
 
dword = data[i];
kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s, handle=%d\n", map,
dword&(1<<31)?"untrusted surface, ":"",
dword&(1<<1)?"vertical line stride enable, ":"",
dword&(1<<0)?"vertical ofs enable, ":"",
reloc->target_handle);
 
dword = data[i];
width = ((dword >> 10) & ((1 << 11) - 1))+1;
height = ((dword >> 21) & ((1 << 11) - 1))+1;
 
tiling = "none";
if (dword & (1 << 2))
tiling = "fenced";
else if (dword & (1 << 1))
tiling = dword & (1 << 0) ? "Y" : "X";
type = " BAD";
format = " (invalid)";
switch ((dword>>7) & 0x7) {
case 1:
type = "8";
switch ((dword>>3) & 0xf) {
case 0: format = "I"; break;
case 1: format = "L"; break;
case 4: format = "A"; break;
case 5: format = " mono"; break;
}
break;
case 2:
type = "16";
switch ((dword>>3) & 0xf) {
case 0: format = " rgb565"; break;
case 1: format = " argb1555"; break;
case 2: format = " argb4444"; break;
case 3: format = " ay88"; break;
case 5: format = " 88dvdu"; break;
case 6: format = " bump655"; break;
case 7: format = "I"; break;
case 8: format = "L"; break;
case 9: format = "A"; break;
}
break;
case 3:
type = "32";
switch ((dword>>3) & 0xf) {
case 0: format = " argb8888"; break;
case 1: format = " abgr8888"; break;
case 2: format = " xrgb8888"; break;
case 3: format = " xbgr8888"; break;
case 4: format = " qwvu8888"; break;
case 5: format = " axvu8888"; break;
case 6: format = " lxvu8888"; break;
case 7: format = " xlvu8888"; break;
case 8: format = " argb2101010"; break;
case 9: format = " abgr2101010"; break;
case 10: format = " awvu2101010"; break;
case 11: format = " gr1616"; break;
case 12: format = " vu1616"; break;
case 13: format = " xI824"; break;
case 14: format = " xA824"; break;
case 15: format = " xL824"; break;
}
break;
case 5:
type = "422";
switch ((dword>>3) & 0xf) {
case 0: format = " yuv_swapy"; break;
case 1: format = " yuv"; break;
case 2: format = " yuv_swapuv"; break;
case 3: format = " yuv_swapuvy"; break;
}
break;
case 6:
type = "compressed";
switch ((dword>>3) & 0x7) {
case 0: format = " dxt1"; break;
case 1: format = " dxt2_3"; break;
case 2: format = " dxt4_5"; break;
case 3: format = " fxt1"; break;
case 4: format = " dxt1_rb"; break;
}
break;
case 7:
type = "4b indexed";
switch ((dword>>3) & 0xf) {
case 7: format = " argb8888"; break;
}
break;
default:
format = "BAD";
break;
}
dword = data[i];
kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n",
map, width, height, type, format, tiling,
dword&(1<<9)?" palette select":"");
 
dword = data[i];
pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n",
map, pitch,
(dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f,
dword&(1<<8)?"miplayout legacy":"miplayout right");
}
}
assert(len == i);
return len;
case 0x06:
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
len = (data[0] & 0x000000ff) + 2;
 
i = 2;
for (c = 0; c <= 31; c++) {
if (data[1] & (1 << c)) {
kgem_debug_print(data, offset, i, "C%d.X = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.Y = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.Z = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.W = %f\n",
c, int_as_float(data[i]));
i++;
}
}
assert(len == i);
return len;
case 0x05:
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
len = (data[0] & 0x000000ff) + 2;
assert(((len-1) % 3) == 0);
assert(len <= 370);
i = 1;
for (instr = 0; instr < (len - 1) / 3; instr++) {
char instr_prefix[10];
 
sprintf(instr_prefix, "PS%03d", instr);
gen3_decode_instruction(data, offset, i, instr_prefix);
i += 3;
}
return len;
case 0x01:
kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n");
kgem_debug_print(data, offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
i = 2;
for (sampler = 0; sampler <= 15; sampler++) {
if (data[1] & (1 << sampler)) {
uint32_t dword;
const char *mip_filter = "";
dword = data[i];
switch ((dword>>20)&0x3) {
case 0: mip_filter = "none"; break;
case 1: mip_filter = "nearest"; break;
case 3: mip_filter = "linear"; break;
}
kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s "
"base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s "
"lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler,
dword&(1<<31)?" reverse gamma,":"",
dword&(1<<30)?" packed2planar,":"",
dword&(1<<29)?" colorspace conversion,":"",
(dword>>22)&0x1f,
mip_filter,
gen3_decode_sample_filter(dword>>17),
gen3_decode_sample_filter(dword>>14),
((dword>>5)&0x1ff)/(0x10*1.0),
dword&(1<<4)?" shadow,":"",
dword&(1<<3)?4:2,
gen3_decode_compare_func(dword));
dword = data[i];
kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s "
"tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n",
sampler, ((dword>>24)&0xff)/(0x10*1.0),
dword&(1<<17)?" kill pixel enable,":"",
decode_tex_coord_mode(dword>>12),
decode_tex_coord_mode(dword>>9),
decode_tex_coord_mode(dword>>6),
dword&(1<<5)?" normalized coords,":"",
(dword>>1)&0xf,
dword&(1<<0)?" deinterlacer,":"");
kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n",
sampler);
}
}
assert(len == i);
return len;
case 0x85:
len = (data[0] & 0x0000000f) + 2;
assert(len == 2);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_DEST_BUFFER_VARIABLES\n");
 
switch ((data[1] >> 8) & 0xf) {
case 0x0: format = "g8"; break;
case 0x1: format = "x1r5g5b5"; break;
case 0x2: format = "r5g6b5"; break;
case 0x3: format = "a8r8g8b8"; break;
case 0x4: format = "ycrcb_swapy"; break;
case 0x5: format = "ycrcb_normal"; break;
case 0x6: format = "ycrcb_swapuv"; break;
case 0x7: format = "ycrcb_swapuvy"; break;
case 0x8: format = "a4r4g4b4"; break;
case 0x9: format = "a1r5g5b5"; break;
case 0xa: format = "a2r10g10b10"; break;
default: format = "BAD"; break;
}
switch ((data[1] >> 2) & 0x3) {
case 0x0: zformat = "u16"; break;
case 0x1: zformat = "f16"; break;
case 0x2: zformat = "u24x8"; break;
default: zformat = "BAD"; break;
}
kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n",
format, zformat,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
 
case 0x8e:
{
const char *name, *tiling;
 
len = (data[0] & 0x0000000f) + 2;
assert(len == 3);
 
switch((data[1] >> 24) & 0x7) {
case 0x3: name = "color"; break;
case 0x7: name = "depth"; break;
default: name = "unknown"; break;
}
 
tiling = "none";
if (data[1] & (1 << 23))
tiling = "fenced";
else if (data[1] & (1 << 22))
tiling = data[1] & (1 << 21) ? "Y" : "X";
 
kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n");
kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
 
kgem_debug_print(data, offset, 2, "address\n");
return len;
}
case 0x81:
len = (data[0] & 0x0000000f) + 2;
assert(len == 3);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_SCISSOR_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "(%d,%d)\n",
data[1] & 0xffff, data[1] >> 16);
kgem_debug_print(data, offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
 
return len;
case 0x80:
len = (data[0] & 0x0000000f) + 2;
assert(len == 5);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "%s\n",
data[1]&(1<<30)?"depth ofs disabled ":"");
kgem_debug_print(data, offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
kgem_debug_print(data, offset, 3, "(%d,%d)\n",
data[3] & 0xffff, data[3] >> 16);
kgem_debug_print(data, offset, 4, "(%d,%d)\n",
(int16_t)(data[4] & 0xffff),
(int16_t)(data[4] >> 16));
 
return len;
case 0x9c:
len = (data[0] & 0x0000000f) + 2;
assert(len == 7);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_CLEAR_PARAMETERS\n");
kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n",
data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT",
data[1]&(1<<2)?"color,":"",
data[1]&(1<<1)?"depth,":"",
data[1]&(1<<0)?"stencil,":"");
kgem_debug_print(data, offset, 2, "clear color\n");
kgem_debug_print(data, offset, 3, "clear depth/stencil\n");
kgem_debug_print(data, offset, 4, "color value (rgba8888)\n");
kgem_debug_print(data, offset, 5, "depth value %f\n",
int_as_float(data[5]));
kgem_debug_print(data, offset, 6, "clear stencil\n");
return len;
}
 
for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) {
opcode_3d_1d = &opcodes_3d_1d[idx];
if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = (data[0] & 0xf) + 2;
kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name);
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
assert(0);
return 1;
}
 
#define VERTEX_OUT(fmt, ...) do { \
kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
i++; \
} while (0)
 
static int
gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
char immediate = (data[0] & (1 << 23)) == 0;
unsigned int len, i, ret;
const char *primtype;
unsigned int vertex = 0;
 
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
case 0x1: primtype = "TRISTRIP"; break;
case 0x2: primtype = "TRISTRIP_REVERSE"; break;
case 0x3: primtype = "TRIFAN"; break;
case 0x4: primtype = "POLYGON"; break;
case 0x5: primtype = "LINELIST"; break;
case 0x6: primtype = "LINESTRIP"; break;
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
case 0xa: primtype = "CLEAR_RECT"; assert(0); break;
default: primtype = "unknown"; break;
}
 
gen3_update_vertex_elements_offsets(kgem);
 
/* XXX: 3DPRIM_DIB not supported */
if (immediate) {
len = (data[0] & 0x0003ffff) + 2;
kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype);
for (i = 1; i < len; ) {
ErrorF(" [%d]: ", vertex);
i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t);
ErrorF("\n");
vertex++;
}
 
ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
if (data[0] & (1 << 17)) {
/* random vertex access */
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
assert(0);
if (len == 0) {
/* vertex indices continue until 0xffff is found */
} else {
/* fixed size vertex index buffer */
}
ret = (len + 1) / 2 + 1;
goto out;
} else {
/* sequential vertex access */
vertex = data[1] & 0xffff;
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, vertex);
kgem_debug_print(data, offset, 1, " start\n");
for (i = 0; i < len; i++) {
ErrorF(" [%d]: ", vertex);
indirect_vertex_out(kgem, vertex++);
ErrorF("\n");
}
ret = 2;
goto out;
}
}
 
out:
return ret;
}
 
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
};
uint32_t *data = kgem->batch + offset;
uint32_t opcode;
unsigned int idx;
 
opcode = (data[0] & 0x1f000000) >> 24;
 
switch (opcode) {
case 0x1f:
return gen3_decode_3d_primitive(kgem, offset);
case 0x1d:
return gen3_decode_3d_1d(kgem, offset);
case 0x1c:
return gen3_decode_3d_1c(kgem, offset);
}
 
for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) {
if (opcode == opcodes[idx].opcode) {
unsigned int len = 1, i;
 
kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name);
if (opcodes[idx].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[idx].min_len ||
len <= opcodes[idx].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
return len;
}
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
return 1;
}
 
 
void kgem_gen3_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/kgem_debug_gen4.c
0,0 → 1,687
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen4_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[33];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[33];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
i = data[0] >> 27;
 
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 27;
state.ve[id].valid = !!(data[0] & (1 << 26));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN4_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN4_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN4_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 0;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[1]; n++) {
int v = data[2] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_965_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_965_prim_type(uint32_t data)
{
uint32_t primtype = (data >> 10) & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
#if 0
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + delta;
}
#endif
 
int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6000, 3, 3, "URB_FENCE" },
{ 0x6001, 2, 2, "CS_URB_FENCE" },
{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *desc1 = NULL;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6000:
assert(len == 3);
 
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
(data[0] >> 13) & 1 ? "cs " : "",
(data[0] >> 12) & 1 ? "vfe " : "",
(data[0] >> 11) & 1 ? "sf " : "",
(data[0] >> 10) & 1 ? "clip " : "",
(data[0] >> 9) & 1 ? "gs " : "",
(data[0] >> 8) & 1 ? "vs " : "");
kgem_debug_print(data, offset, 1,
"vs fence: %d, gs_fence: %d, clip_fence: %d\n",
data[1] & 0x3ff,
(data[1] >> 10) & 0x3ff,
(data[1] >> 20) & 0x3ff);
kgem_debug_print(data, offset, 2,
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
data[2] & 0x3ff,
(data[2] >> 10) & 0x3ff,
(data[2] >> 20) & 0x7ff);
return len;
 
case 0x6001:
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
(data[1] >> 4) & 0x1f,
(((data[1] >> 4) & 0x1f) + 1) * 64,
data[1] & 0x7);
return len;
case 0x6002:
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
(data[0] >> 8) & 1 ? "valid" : "invalid");
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
return len;
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 6);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "media");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "media");
 
return len;
 
case 0x7801:
assert(len == 6);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS\n");
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "CLIP binding table\n");
kgem_debug_print(data, offset, 4, "SF binding table\n");
kgem_debug_print(data, offset, 5, "WM binding table\n");
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen4_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 27,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
for (i = 1; i < len;) {
gen4_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 27,
data[i] & (1 << 26) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
"dst offset 0x%02x bytes\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
get_965_element_component(data[i], 3),
(data[i] & 0xff) * 4);
i++;
}
state.num_ve = (len - 1) / 2; /* XXX? */
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7900:
assert(len == 4);
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
data[1] & 0xffff,
(data[1] >> 16) & 0xffff);
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
data[2] & 0xffff,
(data[2] >> 16) & 0xffff);
kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
(int)data[3] & 0xffff,
((int)data[3] >> 16) & 0xffff);
return len;
 
case 0x7905:
assert(len == 7);
kgem_debug_print(data, offset, 0,
"3DSTATE_DEPTH_BUFFER\n");
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
get_965_surfacetype(data[1] >> 29),
get_965_depthformat((data[1] >> 18) & 0x7),
(data[1] & 0x0001ffff) + 1,
data[1] & (1 << 27) ? "" : "not ",
(data[1] & (1 << 22)) != 0,
(data[1] & (1 << 21)) != 0);
kgem_debug_print(data, offset, 2, "depth offset\n");
kgem_debug_print(data, offset, 3, "%dx%d\n",
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
kgem_debug_print(data, offset, 4, "volume depth\n");
kgem_debug_print(data, offset, 5, "\n");
kgem_debug_print(data, offset, 6, "\n");
return len;
 
case 0x7a00:
assert(len == 4 || len == 5);
switch ((data[1] >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
kgem_debug_print(data, offset, 1,
"%s, %scs stall, %stlb invalidate, "
"%ssync gfdt, %sdepth stall, %sRC write flush, "
"%sinst flush, %sTC flush\n",
desc1,
data[1] & (1 << 20) ? "" : "no ",
data[1] & (1 << 18) ? "" : "no ",
data[1] & (1 << 17) ? "" : "no ",
data[1] & (1 << 13) ? "" : "no ",
data[1] & (1 << 12) ? "" : "no ",
data[1] & (1 << 11) ? "" : "no ",
data[1] & (1 << 10) ? "" : "no ");
if (len == 5) {
kgem_debug_print(data, offset, 2, "destination address\n");
kgem_debug_print(data, offset, 3, "immediate dword low\n");
kgem_debug_print(data, offset, 4, "immediate dword high\n");
} else {
for (i = 2; i < len; i++) {
kgem_debug_print(data, offset, i, "\n");
}
}
return len;
 
case 0x7b00:
assert(len == 6);
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE: %s %s\n",
get_965_prim_type(data[0]),
(data[0] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 1, "vertex count\n");
kgem_debug_print(data, offset, 2, "start vertex\n");
kgem_debug_print(data, offset, 3, "instance count\n");
kgem_debug_print(data, offset, 4, "start instance\n");
kgem_debug_print(data, offset, 5, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode)
break;
 
assert(i < ARRAY_SIZE(opcodes));
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen4_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/kgem_debug_gen5.c
0,0 → 1,662
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen5_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
int size;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[17];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[17];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
struct drm_i915_gem_relocation_entry *reloc;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i, size;
 
reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch);
if (reloc->target_handle == -1) {
base = kgem->batch;
size = kgem->nbatch * sizeof(uint32_t);
} else {
bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc);
base = kgem_bo_map__debug(kgem, bo);
size = kgem_bo_size(bo);
}
ptr = (char *)base + reloc->delta;
 
i = data[0] >> 27;
 
state.vb[i].handle = reloc->target_handle;
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
state.vb[i].size = size;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 27;
state.ve[id].valid = !!(data[0] & (1 << 26));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (o < max)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (o < max)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN5_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN5_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN5_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
assert(vb->pitch);
assert(ve->offset + v*vb->pitch < vb->size);
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[1]; n++) {
int v = data[2] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_965_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_965_prim_type(uint32_t data)
{
uint32_t primtype = (data >> 10) & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
#if 0
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map(kgem, bo, PROT_READ);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + delta;
}
#endif
 
int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6000, 3, 3, "URB_FENCE" },
{ 0x6001, 2, 2, "CS_URB_FENCE" },
{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *desc1 = NULL;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6000:
assert(len == 3);
 
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
(data[0] >> 13) & 1 ? "cs " : "",
(data[0] >> 12) & 1 ? "vfe " : "",
(data[0] >> 11) & 1 ? "sf " : "",
(data[0] >> 10) & 1 ? "clip " : "",
(data[0] >> 9) & 1 ? "gs " : "",
(data[0] >> 8) & 1 ? "vs " : "");
kgem_debug_print(data, offset, 1,
"vs fence: %d, gs_fence: %d, clip_fence: %d\n",
data[1] & 0x3ff,
(data[1] >> 10) & 0x3ff,
(data[1] >> 20) & 0x3ff);
kgem_debug_print(data, offset, 2,
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
data[2] & 0x3ff,
(data[2] >> 10) & 0x3ff,
(data[2] >> 20) & 0x7ff);
return len;
 
case 0x6001:
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
(data[1] >> 4) & 0x1f,
(((data[1] >> 4) & 0x1f) + 1) * 64,
data[1] & 0x7);
return len;
case 0x6002:
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
(data[0] >> 8) & 1 ? "valid" : "invalid");
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
return len;
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 8);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "media");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "media");
state_max_out(data, offset, i++, "instruction");
 
return len;
 
case 0x7801:
assert(len == 6);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS\n");
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "CLIP binding table\n");
kgem_debug_print(data, offset, 4, "SF binding table\n");
kgem_debug_print(data, offset, 5, "WM binding table\n");
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen5_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 27,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
for (i = 1; i < len;) {
gen5_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i,
"buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 27,
data[i] & (1 << 26) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
get_965_element_component(data[i], 3));
i++;
}
state.num_ve = (len - 1) / 2; /* XXX? */
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7900:
assert(len == 4);
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
data[1] & 0xffff,
(data[1] >> 16) & 0xffff);
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
data[2] & 0xffff,
(data[2] >> 16) & 0xffff);
kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
(int)data[3] & 0xffff,
((int)data[3] >> 16) & 0xffff);
return len;
 
case 0x7905:
assert(len == 7);
kgem_debug_print(data, offset, 0,
"3DSTATE_DEPTH_BUFFER\n");
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
get_965_surfacetype(data[1] >> 29),
get_965_depthformat((data[1] >> 18) & 0x7),
(data[1] & 0x0001ffff) + 1,
data[1] & (1 << 27) ? "" : "not ",
(data[1] & (1 << 22)) != 0,
(data[1] & (1 << 21)) != 0);
kgem_debug_print(data, offset, 2, "depth offset\n");
kgem_debug_print(data, offset, 3, "%dx%d\n",
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
kgem_debug_print(data, offset, 4, "volume depth\n");
kgem_debug_print(data, offset, 5, "\n");
kgem_debug_print(data, offset, 6, "\n");
return len;
 
case 0x7a00:
assert(len == 4 || len == 5);
switch ((data[1] >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
kgem_debug_print(data, offset, 1,
"%s, %scs stall, %stlb invalidate, "
"%ssync gfdt, %sdepth stall, %sRC write flush, "
"%sinst flush, %sTC flush\n",
desc1,
data[1] & (1 << 20) ? "" : "no ",
data[1] & (1 << 18) ? "" : "no ",
data[1] & (1 << 17) ? "" : "no ",
data[1] & (1 << 13) ? "" : "no ",
data[1] & (1 << 12) ? "" : "no ",
data[1] & (1 << 11) ? "" : "no ",
data[1] & (1 << 10) ? "" : "no ");
if (len == 5) {
kgem_debug_print(data, offset, 2, "destination address\n");
kgem_debug_print(data, offset, 3, "immediate dword low\n");
kgem_debug_print(data, offset, 4, "immediate dword high\n");
} else {
for (i = 2; i < len; i++) {
kgem_debug_print(data, offset, i, "\n");
}
}
return len;
 
case 0x7b00:
assert(len == 6);
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE: %s %s\n",
get_965_prim_type(data[0]),
(data[0] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 1, "vertex count\n");
kgem_debug_print(data, offset, 2, "start vertex\n");
kgem_debug_print(data, offset, 3, "instance count\n");
kgem_debug_print(data, offset, 4, "start instance\n");
kgem_debug_print(data, offset, 5, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode)
break;
 
assert(i < ARRAY_SIZE(opcodes));
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen5_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/kgem_debug_gen6.c
39,8 → 39,6
 
#include "kgem_debug.h"
 
#define ErrorF printf
 
static struct state {
struct vertex_buffer {
int handle;
/drivers/video/Intel-2D/kgem_debug_gen7.c
0,0 → 1,715
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris"chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
#include "gen7_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[33];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[33];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
i = data[0] >> 26;
 
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
}
 
static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
{
uint32_t reloc = sizeof(uint32_t) * offset;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
if ((kgem->batch[offset] & 1) == 0)
return;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
if(i < kgem->nreloc) {
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + (kgem->reloc[i].delta & ~1);
} else {
bo = NULL;
base = NULL;
ptr = NULL;
}
 
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
state.dynamic_state.ptr = ptr;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen7_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 26;
state.ve[id].valid = !!(data[0] & (1 << 25));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void gen7_update_sf_state(struct kgem *kgem, uint32_t *data)
{
state.num_ve = 1 + ((data[1] >> 22) & 0x3f);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN7_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN7_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN7_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[2]; n++) {
int v = data[3] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_prim_type(uint32_t data)
{
uint32_t primtype = data & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + (delta & ~3);
}
 
static const char *
gen7_filter_to_string(uint32_t filter)
{
switch (filter) {
default:
case GEN7_MAPFILTER_NEAREST: return "nearest";
case GEN7_MAPFILTER_LINEAR: return "linear";
}
}
 
static const char *
gen7_repeat_to_string(uint32_t repeat)
{
switch (repeat) {
default:
case GEN7_TEXCOORDMODE_CLAMP_BORDER: return "border";
case GEN7_TEXCOORDMODE_WRAP: return "wrap";
case GEN7_TEXCOORDMODE_CLAMP: return "clamp";
case GEN7_TEXCOORDMODE_MIRROR: return "mirror";
}
}
 
static void
gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen7_sampler_state *ss;
struct reloc r;
const char *min, *mag;
const char *s_wrap, *t_wrap, *r_wrap;
 
ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
min = gen7_filter_to_string(ss->ss0.min_filter);
mag = gen7_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode);
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode);
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode);
 
ErrorF(" Sampler 0:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
 
ss++;
min = gen7_filter_to_string(ss->ss0.min_filter);
mag = gen7_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode);
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode);
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode);
 
ErrorF(" Sampler 1:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
}
 
static const char *
gen7_blend_factor_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN7_BLENDFACTOR_##x: return #x;
C(ONE);
C(SRC_COLOR);
C(SRC_ALPHA);
C(DST_ALPHA);
C(DST_COLOR);
C(SRC_ALPHA_SATURATE);
C(CONST_COLOR);
C(CONST_ALPHA);
C(SRC1_COLOR);
C(SRC1_ALPHA);
C(ZERO);
C(INV_SRC_COLOR);
C(INV_SRC_ALPHA);
C(INV_DST_ALPHA);
C(INV_DST_COLOR);
C(INV_CONST_COLOR);
C(INV_CONST_ALPHA);
C(INV_SRC1_COLOR);
C(INV_SRC1_ALPHA);
#undef C
default: return "???";
}
}
 
static const char *
gen7_blend_function_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN7_BLENDFUNCTION_##x: return #x;
C(ADD);
C(SUBTRACT);
C(REVERSE_SUBTRACT);
C(MIN);
C(MAX);
#undef C
default: return "???";
}
}
 
static void
gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen7_blend_state *blend;
struct reloc r;
const char *dst, *src;
const char *func;
 
blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
dst = gen7_blend_factor_to_string(blend->blend0.dest_blend_factor);
src = gen7_blend_factor_to_string(blend->blend0.source_blend_factor);
func = gen7_blend_function_to_string(blend->blend0.blend_func);
 
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
blend->blend0.blend_enable ? "enabled" : "disabled",
func, src, dst);
}
 
int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *name;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 10);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "dynamic");
state_base_out(data, offset, i++, "indirect");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "dynamic");
state_max_out(data, offset, i++, "indirect");
state_max_out(data, offset, i++, "instruction");
 
gen7_update_dynamic_buffer(kgem, offset + 3);
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen7_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 26,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
for (i = 1; i < len;) {
gen7_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 26,
data[i] & (1 << 25) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
"dst offset 0x%02x bytes\n",
get_element_component(data[i], 0),
get_element_component(data[i], 1),
get_element_component(data[i], 2),
get_element_component(data[i], 3),
(data[i] & 0xff) * 4);
i++;
}
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7b00:
assert(len == 7);
kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n");
kgem_debug_print(data, offset, 1, "type %s, %s\n",
get_prim_type(data[1]),
(data[1] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 2, "vertex count\n");
kgem_debug_print(data, offset, 3, "start vertex\n");
kgem_debug_print(data, offset, 4, "instance count\n");
kgem_debug_print(data, offset, 5, "start instance\n");
kgem_debug_print(data, offset, 6, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
name = NULL;
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode) {
name = opcodes[i].name;
break;
}
 
len = (data[0] & 0xff) + 2;
if (name == NULL) {
kgem_debug_print(data, offset, 0, "unknown\n");
} else {
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
}
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen7_finish_state(struct kgem *kgem)
{
finish_state(kgem);
}
/drivers/video/Intel-2D/sna.c
1,11 → 1,51
/**************************************************************************
 
Copyright 2001 VA Linux Systems Inc., Fremont, California.
Copyright © 2002 by David Dawes
 
All Rights Reserved.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
 
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/*
* Authors: Jeff Hartmann <jhartmann@valinux.com>
* Abraham van der Merwe <abraham@2d3d.co.za>
* David Dawes <dawes@xfree86.org>
* Alan Hourihane <alanh@tungstengraphics.com>
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <memory.h>
#include <malloc.h>
#include <kos32sys.h>
#include <pixlib2.h>
#include "i915_pciids.h"
 
#include "compiler.h"
#include "sna.h"
#include "intel_driver.h"
 
#define to_surface(x) (surface_t*)((x)->handle)
 
46,6 → 86,8
uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing,
unsigned flags, uint32_t width, uint32_t height,
uint32_t bpp, uint32_t tiling, uint32_t *pitch);
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
int pitch, int height);
 
void kgem_close_batches(struct kgem *kgem);
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
53,8 → 95,6
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci);
 
//struct kgem_bo *create_bo(bitmap_t *bitmap);
 
static bool sna_solid_cache_init(struct sna *sna);
 
struct sna *sna_device;
66,8 → 106,46
(void)sna;
}
 
void no_render_init(struct sna *sna)
static void no_render_flush(struct sna *sna)
{
(void)sna;
}
 
static void
no_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (!kgem->nbatch)
return;
 
if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
 
(void)new_mode;
}
 
static void
no_render_retire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_expire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_fini(struct sna *sna)
{
(void)sna;
}
 
const char *no_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
memset (render,0, sizeof (*render));
77,27 → 155,19
render->vertices = render->vertex_data;
render->vertex_size = ARRAY_SIZE(render->vertex_data);
 
// render->composite = no_render_composite;
 
// render->copy_boxes = no_render_copy_boxes;
// render->copy = no_render_copy;
 
// render->fill_boxes = no_render_fill_boxes;
// render->fill = no_render_fill;
// render->fill_one = no_render_fill_one;
// render->clear = no_render_clear;
 
render->reset = no_render_reset;
// render->flush = no_render_flush;
// render->fini = no_render_fini;
render->flush = no_render_flush;
render->fini = no_render_fini;
 
// sna->kgem.context_switch = no_render_context_switch;
// sna->kgem.retire = no_render_retire;
sna->kgem.context_switch = no_render_context_switch;
sna->kgem.retire = no_render_retire;
sna->kgem.expire = no_render_expire;
 
if (sna->kgem.gen >= 60)
sna->kgem.mode = KGEM_RENDER;
sna->kgem.ring = KGEM_RENDER;
 
sna_vertex_init(sna);
return "generic";
}
 
void sna_vertex_init(struct sna *sna)
111,47 → 181,27
{
const char *backend;
 
// list_init(&sna->deferred_free);
// list_init(&sna->dirty_pixmaps);
// list_init(&sna->active_pixmaps);
// list_init(&sna->inactive_clock[0]);
// list_init(&sna->inactive_clock[1]);
backend = no_render_init(sna);
if (sna->info->gen >= 0100)
(void)backend;
else if (sna->info->gen >= 070)
backend = gen7_render_init(sna, backend);
else if (sna->info->gen >= 060)
backend = gen6_render_init(sna, backend);
else if (sna->info->gen >= 050)
backend = gen5_render_init(sna, backend);
else if (sna->info->gen >= 040)
backend = gen4_render_init(sna, backend);
else if (sna->info->gen >= 030)
backend = gen3_render_init(sna, backend);
 
// sna_accel_install_timers(sna);
 
 
backend = "no";
no_render_init(sna);
 
if (sna->info->gen >= 0100) {
} else if (sna->info->gen >= 070) {
if (gen7_render_init(sna))
backend = "IvyBridge";
} else if (sna->info->gen >= 060) {
if (gen6_render_init(sna))
backend = "SandyBridge";
} else if (sna->info->gen >= 050) {
if (gen5_render_init(sna))
backend = "Ironlake";
} else if (sna->info->gen >= 040) {
if (gen4_render_init(sna))
backend = "Broadwater/Crestline";
} else if (sna->info->gen >= 030) {
if (gen3_render_init(sna))
backend = "gen3";
}
 
DBG(("%s(backend=%s, prefer_gpu=%x)\n",
__FUNCTION__, backend, sna->render.prefer_gpu));
 
kgem_reset(&sna->kgem);
 
// if (!sna_solid_cache_init(sna))
// return false;
 
sna_device = sna;
 
 
return kgem_init_fb(&sna->kgem, &sna_fb);
}
 
186,33 → 236,17
 
memset(sna, 0, sizeof(*sna));
sna->cpu_features = sna_cpu_detect();
 
sna->PciInfo = &device;
 
sna->info = intel_detect_chipset(sna->PciInfo);
sna->scrn = service;
 
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
/*
if (!xf86ReturnOptValBool(sna->Options,
OPTION_RELAXED_FENCING,
sna->kgem.has_relaxed_fencing)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED,
"Disabling use of relaxed fencing\n");
sna->kgem.has_relaxed_fencing = 0;
}
if (!xf86ReturnOptValBool(sna->Options,
OPTION_VMAP,
sna->kgem.has_vmap)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_vmap ? X_CONFIG : X_PROBED,
"Disabling use of vmap\n");
sna->kgem.has_vmap = 0;
}
*/
 
/* Disable tiling by default */
sna->tiling = SNA_TILING_DISABLE;
sna->tiling = 0;
 
/* Default fail-safe value of 75 Hz */
// sna->vblank_interval = 1000 * 1000 * 1000 / 75;
490,6 → 524,47
return -1;
};
 
int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
{
surface_t *sf;
struct kgem_bo *bo;
 
sf = malloc(sizeof(*sf));
if(sf == NULL)
goto err_1;
 
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height);
 
__lock_release_recursive(__sna_lock);
 
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = NULL;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
sf->flags = bitmap->flags;
 
bitmap->handle = (uint32_t)sf;
 
return 0;
 
err_2:
__lock_release_recursive(__sna_lock);
free(sf);
err_1:
return -1;
};
 
void sna_set_bo_handle(bitmap_t *bitmap, int handle)
{
surface_t *sf = to_surface(bitmap);
struct kgem_bo *bo = sf->bo;
bo->handle = handle;
}
 
int sna_destroy_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
686,7 → 761,7
VG_CLEAR(update);
update.handle = mask_bo->handle;
update.bo_map = (__u32)MAP(mask_bo->map);
update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
mask_bo->pitch = update.bo_pitch;
756,8 → 831,6
 
 
 
 
 
static const struct intel_device_info intel_generic_info = {
.gen = -1,
};
807,100 → 880,37
 
static const struct pci_id_match intel_device_match[] = {
 
INTEL_I915G_IDS(&intel_i915_info),
INTEL_I915GM_IDS(&intel_i915_info),
INTEL_I945G_IDS(&intel_i945_info),
INTEL_I945GM_IDS(&intel_i945_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ),
INTEL_G33_IDS(&intel_g33_info),
INTEL_PINEVIEW_IDS(&intel_g33_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ),
/* Another marketing win: Q35 is another g33 device not a gen4 part
* like its G35 brethren.
*/
INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ),
INTEL_I965G_IDS(&intel_i965_info),
INTEL_I965GM_IDS(&intel_i965_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ),
INTEL_G45_IDS(&intel_g4x_info),
INTEL_GM45_IDS(&intel_g4x_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ),
INTEL_IRONLAKE_D_IDS(&intel_ironlake_info),
INTEL_IRONLAKE_M_IDS(&intel_ironlake_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ),
INTEL_SNB_D_IDS(&intel_sandybridge_info),
INTEL_SNB_M_IDS(&intel_sandybridge_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ),
INTEL_IVB_D_IDS(&intel_ivybridge_info),
INTEL_IVB_M_IDS(&intel_ivybridge_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ),
INTEL_HSW_D_IDS(&intel_haswell_info),
INTEL_HSW_M_IDS(&intel_haswell_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ),
INTEL_VLV_D_IDS(&intel_valleyview_info),
INTEL_VLV_M_IDS(&intel_valleyview_info),
 
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ),
INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
 
INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ),
 
{ 0, 0, 0 },
};
 
948,7 → 958,21
}
 
int intel_get_device_id(int fd)
{
struct drm_i915_getparam gp;
int devid = 0;
 
memset(&gp, 0, sizeof(gp));
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
 
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
return 0;
 
return devid;
}
 
int drmIoctl(int fd, unsigned long request, void *arg)
{
ioctl_t io;
/drivers/video/Intel-2D/sna.h
37,26 → 37,83
#ifndef _SNA_H_
#define _SNA_H_
 
#include <stdint.h>
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <stdint.h>
#include "compiler.h"
 
 
#include <memory.h>
#include <malloc.h>
#include <errno.h>
#include <kos32sys.h>
 
#include "intel_driver.h"
#include "pciaccess.h"
 
#include "compiler.h"
#include <drm.h>
#include <i915_drm.h>
 
//#define DBG(x)
//#define DBG(x) ErrorF x
#ifdef HAVE_DRI2_H
#include <dri2.h>
#endif
 
#define assert(x)
#if HAVE_UDEV
#include <libudev.h>
#endif
 
#if 0
#include <xorg-server.h>
 
#include <xf86Crtc.h>
#if XF86_CRTC_VERSION >= 5
#define HAS_PIXMAP_SHARING 1
#endif
 
#include <xf86str.h>
#include <windowstr.h>
#include <glyphstr.h>
#include <picturestr.h>
#include <gcstruct.h>
#include <xvdix.h>
 
#include <pciaccess.h>
 
#include <xf86drmMode.h>
 
#include "../compat-api.h"
 
#endif
 
#include <assert.h>
 
#define ErrorF printf
 
#if HAS_DEBUG_FULL
#define DBG(x) ErrorF x
#else
#define DBG(x)
#endif
 
#define DEBUG_NO_BLT 0
 
#define DEBUG_FLUSH_BATCH 0
 
#define TEST_ALL 0
#define TEST_ACCEL (TEST_ALL || 0)
#define TEST_BATCH (TEST_ALL || 0)
#define TEST_BLT (TEST_ALL || 0)
#define TEST_COMPOSITE (TEST_ALL || 0)
#define TEST_DAMAGE (TEST_ALL || 0)
#define TEST_GRADIENT (TEST_ALL || 0)
#define TEST_GLYPHS (TEST_ALL || 0)
#define TEST_IO (TEST_ALL || 0)
#define TEST_KGEM (TEST_ALL || 0)
#define TEST_RENDER (TEST_ALL || 0)
 
int drmIoctl(int fd, unsigned long request, void *arg);
 
 
94,12 → 151,69
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0)
/* sRGB formats */
PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
/* 24bpp formats */
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
 
/* 16bpp formats */
PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
 
PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
 
/* 8bpp formats */
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
 
PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
 
PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 4bpp formats */
PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
 
PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 1bpp formats */
PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
 
PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* YUV formats */
PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
 
} pixman_format_code_t;
 
typedef enum _PictFormatShort {
 
PICT_a2r10g10b10 = PIXMAN_a2r10g10b10,
PICT_x2r10g10b10 = PIXMAN_x2r10g10b10,
PICT_a2b10g10r10 = PIXMAN_a2b10g10r10,
PICT_x2b10g10r10 = PIXMAN_x2b10g10r10,
 
PICT_a8r8g8b8 = PIXMAN_a8r8g8b8,
PICT_x8r8g8b8 = PIXMAN_x8r8g8b8,
PICT_a8b8g8r8 = PIXMAN_a8b8g8r8,
107,10 → 221,52
PICT_b8g8r8a8 = PIXMAN_b8g8r8a8,
PICT_b8g8r8x8 = PIXMAN_b8g8r8x8,
 
/* 24bpp formats */
PICT_r8g8b8 = PIXMAN_r8g8b8,
PICT_b8g8r8 = PIXMAN_b8g8r8,
 
/* 16bpp formats */
PICT_r5g6b5 = PIXMAN_r5g6b5,
PICT_b5g6r5 = PIXMAN_b5g6r5,
 
PICT_a1r5g5b5 = PIXMAN_a1r5g5b5,
PICT_x1r5g5b5 = PIXMAN_x1r5g5b5,
PICT_a1b5g5r5 = PIXMAN_a1b5g5r5,
PICT_x1b5g5r5 = PIXMAN_x1b5g5r5,
PICT_a4r4g4b4 = PIXMAN_a4r4g4b4,
PICT_x4r4g4b4 = PIXMAN_x4r4g4b4,
PICT_a4b4g4r4 = PIXMAN_a4b4g4r4,
PICT_x4b4g4r4 = PIXMAN_x4b4g4r4,
 
/* 8bpp formats */
PICT_a8 = PIXMAN_a8,
PICT_r3g3b2 = PIXMAN_r3g3b2,
PICT_b2g3r3 = PIXMAN_b2g3r3,
PICT_a2r2g2b2 = PIXMAN_a2r2g2b2,
PICT_a2b2g2r2 = PIXMAN_a2b2g2r2,
 
PICT_c8 = PIXMAN_c8,
PICT_g8 = PIXMAN_g8,
 
PICT_x4a4 = PIXMAN_x4a4,
 
PICT_x4c4 = PIXMAN_x4c4,
PICT_x4g4 = PIXMAN_x4g4,
 
/* 4bpp formats */
PICT_a4 = PIXMAN_a4,
PICT_r1g2b1 = PIXMAN_r1g2b1,
PICT_b1g2r1 = PIXMAN_b1g2r1,
PICT_a1r1g1b1 = PIXMAN_a1r1g1b1,
PICT_a1b1g1r1 = PIXMAN_a1b1g1r1,
 
PICT_c4 = PIXMAN_c4,
PICT_g4 = PIXMAN_g4,
 
/* 1bpp formats */
PICT_a1 = PIXMAN_a1,
 
PICT_g1 = PIXMAN_g1
} PictFormatShort;
 
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
266,8 → 422,22
#define PictOpMaximum 13
 
 
#define SNA_CURSOR_X 64
#define SNA_CURSOR_Y SNA_CURSOR_X
 
struct sna_client {
int is_compositor; /* only 4 bits used */
};
 
 
//#define assert(x)
 
 
struct sna {
struct kgem kgem;
 
unsigned scrn;
 
unsigned flags;
#define SNA_NO_WAIT 0x1
#define SNA_NO_FLIP 0x2
274,21 → 444,31
#define SNA_TRIPLE_BUFFER 0x4
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
#define SNA_FLUSH_GTT 0x40
#define SNA_IS_HOSTED 0x80
#define SNA_PERFORMANCE 0x100
#define SNA_POWERSAVE 0x200
#define SNA_REPROBE 0x80000000
 
unsigned cpu_features;
#define MMX 0x1
#define SSE 0x2
#define SSE2 0x4
#define SSE3 0x8
#define SSSE3 0x10
#define SSE4_1 0x20
#define SSE4_2 0x40
#define AVX 0x80
#define AVX2 0x100
 
struct list flush_pixmaps;
struct list active_pixmaps;
 
 
 
// int vblank_interval;
 
// struct list deferred_free;
// struct list dirty_pixmaps;
// struct list active_pixmaps;
// struct list inactive_clock[2];
 
unsigned int tiling;
#define SNA_TILING_DISABLE 0x0
#define SNA_TILING_FB 0x1
#define SNA_TILING_2D 0x2
#define SNA_TILING_ALL (~0)
303,7 → 483,6
uint32_t fill_alu;
} blt_state;
union {
// struct gen2_render_state gen2;
struct gen3_render_state gen3;
struct gen4_render_state gen4;
struct gen5_render_state gen5;
318,12 → 497,11
/* Driver phase/state information */
// Bool suspended;
 
struct kgem kgem;
struct sna_render render;
 
#if DEBUG_MEMORY
struct {
int shadow_pixels_allocs;
int pixmap_allocs;
int cpu_bo_allocs;
size_t shadow_pixels_bytes;
size_t cpu_bo_bytes;
352,4 → 530,78
#ifndef MAX
#define MAX(a,b) ((a) >= (b) ? (a) : (b))
#endif
static inline bool
_sna_transform_point(const PictTransform *transform,
int64_t x, int64_t y, int64_t result[3])
{
int j;
 
for (j = 0; j < 3; j++)
result[j] = (transform->matrix[j][0] * x +
transform->matrix[j][1] * y +
transform->matrix[j][2]);
 
return result[2] != 0;
}
 
static inline void
_sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out)
{
 
int64_t result[3];
 
_sna_transform_point(transform, x, y, result);
*x_out = result[0] / (double)result[2];
*y_out = result[1] / (double)result[2];
}
 
static inline void
_sna_get_transformed_scaled(int x, int y,
const PictTransform *transform, const float *sf,
float *x_out, float *y_out)
{
*x_out = sf[0] * (transform->matrix[0][0] * x +
transform->matrix[0][1] * y +
transform->matrix[0][2]);
 
*y_out = sf[1] * (transform->matrix[1][0] * x +
transform->matrix[1][1] * y +
transform->matrix[1][2]);
}
 
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out);
 
void
sna_get_transformed_coordinates_3d(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out, float *z_out);
 
bool sna_transform_is_affine(const PictTransform *t);
bool sna_transform_is_integer_translation(const PictTransform *t,
int16_t *tx, int16_t *ty);
bool sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx, pixman_fixed_t *ty);
static inline bool
sna_affine_transform_is_rotation(const PictTransform *t)
{
assert(sna_transform_is_affine(t));
return t->matrix[0][1] | t->matrix[1][0];
}
 
static inline bool
sna_transform_equal(const PictTransform *a, const PictTransform *b)
{
if (a == b)
return true;
 
if (a == NULL || b == NULL)
return false;
 
return memcmp(a, b, sizeof(*a)) == 0;
}
#endif /* _SNA_H */
/drivers/video/Intel-2D/sna_cpu.c
0,0 → 1,116
/*
* Copyright (c) 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_cpuid.h"
 
#define xgetbv(index,eax,edx) \
__asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
 
#define has_YMM 0x1
 
unsigned sna_cpu_detect(void)
{
unsigned max = __get_cpuid_max(BASIC_CPUID, NULL);
unsigned int eax, ebx, ecx, edx;
unsigned features = 0;
unsigned extra = 0;
 
if (max >= 1) {
__cpuid(1, eax, ebx, ecx, edx);
if (ecx & bit_SSE3)
features |= SSE3;
 
if (ecx & bit_SSSE3)
features |= SSSE3;
 
if (ecx & bit_SSE4_1)
features |= SSE4_1;
 
if (ecx & bit_SSE4_2)
features |= SSE4_2;
 
if (ecx & bit_OSXSAVE) {
unsigned int bv_eax, bv_ecx;
xgetbv(0, bv_eax, bv_ecx);
if ((bv_eax & 6) == 6)
extra |= has_YMM;
}
 
if ((extra & has_YMM) && (ecx & bit_AVX))
features |= AVX;
 
if (edx & bit_MMX)
features |= MMX;
 
if (edx & bit_SSE)
features |= SSE;
 
if (edx & bit_SSE2)
features |= SSE2;
}
 
if (max >= 7) {
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if ((extra & has_YMM) && (ebx & bit_AVX2))
features |= AVX2;
}
 
return features;
}
 
char *sna_cpu_features_to_string(unsigned features, char *line)
{
char *ret = line;
 
#ifdef __x86_64__
line += sprintf (line, "x86-64");
#else
line += sprintf (line, "x86");
#endif
 
if (features & SSE2)
line += sprintf (line, ", sse2");
if (features & SSE3)
line += sprintf (line, ", sse3");
if (features & SSSE3)
line += sprintf (line, ", ssse3");
if (features & SSE4_1)
line += sprintf (line, ", sse4.1");
if (features & SSE4_2)
line += sprintf (line, ", sse4.2");
if (features & AVX)
line += sprintf (line, ", avx");
if (features & AVX2)
line += sprintf (line, ", avx2");
 
return ret;
}
/drivers/video/Intel-2D/sna_cpuid.h
0,0 → 1,86
/*
* Copyright (c) 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
/* Small wrapper around compiler specific implementation details of cpuid */
 
#ifndef SNA_CPUID_H
#define SNA_CPUID_H
 
#include "compiler.h"
 
#if HAS_GCC(4, 4) /* for __cpuid_count() */
#include <cpuid.h>
#else
#define __get_cpuid_max(x, y) 0
#define __cpuid(level, a, b, c, d)
#define __cpuid_count(level, count, a, b, c, d)
#endif
 
#define BASIC_CPUID 0x0
#define EXTENDED_CPUID 0x80000000
 
#ifndef bit_MMX
#define bit_MMX (1 << 23)
#endif
 
#ifndef bit_SSE
#define bit_SSE (1 << 25)
#endif
 
#ifndef bit_SSE2
#define bit_SSE2 (1 << 26)
#endif
 
#ifndef bit_SSE3
#define bit_SSE3 (1 << 0)
#endif
 
#ifndef bit_SSSE3
#define bit_SSSE3 (1 << 9)
#endif
 
#ifndef bit_SSE4_1
#define bit_SSE4_1 (1 << 19)
#endif
 
#ifndef bit_SSE4_2
#define bit_SSE4_2 (1 << 20)
#endif
 
#ifndef bit_OSXSAVE
#define bit_OSXSAVE (1 << 27)
#endif
 
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif
 
#ifndef bit_AVX2
#define bit_AVX2 (1<<5)
#endif
 
#endif /* SNA_CPUID_H */
/drivers/video/Intel-2D/sna_render.h
30,11 → 30,7
struct sna_composite_op {
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
fastcall void (*box)(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
 
void (*done)(struct sna *sna, const struct sna_composite_op *op);
 
struct sna_damage **damage;
192,9 → 188,6
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap);
 
bool (*fill_boxes)(struct sna *sna,
358,10 → 351,10
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t last_pipelined_pointers;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
uint16_t last_pipelined_pointers;
 
bool needs_invariant;
};
505,17 → 498,15
uint32_t format);
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
void no_render_init(struct sna *sna);
const char *no_render_init(struct sna *sna);
const char *gen2_render_init(struct sna *sna, const char *backend);
const char *gen3_render_init(struct sna *sna, const char *backend);
const char *gen4_render_init(struct sna *sna, const char *backend);
const char *gen5_render_init(struct sna *sna, const char *backend);
const char *gen6_render_init(struct sna *sna, const char *backend);
const char *gen7_render_init(struct sna *sna, const char *backend);
 
bool gen2_render_init(struct sna *sna);
bool gen3_render_init(struct sna *sna);
bool gen4_render_init(struct sna *sna);
bool gen5_render_init(struct sna *sna);
bool gen6_render_init(struct sna *sna);
bool gen7_render_init(struct sna *sna);
 
#if 0
 
bool sna_tiling_composite(uint32_t op,
PicturePtr src,
PicturePtr mask,
683,7 → 674,8
bool
sna_render_composite_redirect(struct sna *sna,
struct sna_composite_op *op,
int x, int y, int width, int height);
int x, int y, int width, int height,
bool partial);
 
void
sna_render_composite_redirect_done(struct sna *sna,
691,8 → 683,8
 
bool
sna_composite_mask_is_opaque(PicturePtr mask);
#endif
 
#endif
void sna_vertex_init(struct sna *sna);
 
static inline void sna_vertex_lock(struct sna_render *r)
726,4 → 718,69
return was_active;
}
 
#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
PICT_FORMAT_TYPE(format), \
0, \
PICT_FORMAT_R(format), \
PICT_FORMAT_G(format), \
PICT_FORMAT_B(format))
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
#endif /* SNA_RENDER_H */
/drivers/video/Intel-2D/sna_render_inline.h
17,7 → 17,7
dst->drawable.height > sna->render.max_3d_size);
}
 
static inline float pack_2s(int16_t x, int16_t y)
static force_inline float pack_2s(int16_t x, int16_t y)
{
union {
struct sna_coordinate p;
28,21 → 28,21
return u.f;
}
 
static inline int vertex_space(struct sna *sna)
static force_inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
}
static inline void vertex_emit(struct sna *sna, float v)
static force_inline void vertex_emit(struct sna *sna, float v)
{
assert(sna->render.vertex_used < sna->render.vertex_size);
sna->render.vertices[sna->render.vertex_used++] = v;
}
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
vertex_emit(sna, pack_2s(x, y));
}
 
static inline int batch_space(struct sna *sna)
static force_inline int batch_space(struct sna *sna)
{
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
49,7 → 49,7
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
}
 
static inline void batch_emit(struct sna *sna, uint32_t dword)
static force_inline void batch_emit(struct sna *sna, uint32_t dword)
{
assert(sna->kgem.mode != KGEM_NONE);
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
56,7 → 56,7
sna->kgem.batch[sna->kgem.nbatch++] = dword;
}
 
static inline void batch_emit_float(struct sna *sna, float f)
static force_inline void batch_emit_float(struct sna *sna, float f)
{
union {
uint32_t dw;
67,5 → 67,4
}
 
 
 
#endif /* SNA_RENDER_INLINE_H */
/drivers/video/Intel-2D/sna_transform.c
0,0 → 1,147
/*
* Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved.
* Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Jesse Barns <jbarnes@virtuousgeek.org>
* Chris Wilson <chris@chris-wilson.co.uk>
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
 
#define pixman_fixed_e ((pixman_fixed_t) 1)
#define pixman_fixed_1 (pixman_int_to_fixed(1))
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e)
#define pixman_fixed_to_int(f) ((int) ((f) >> 16))
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16))
#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e)
 
#define IntToxFixed(i) pixman_int_to_fixed(i)
 
/**
* Returns whether the provided transform is affine.
*
* transform may be null.
*/
bool sna_transform_is_affine(const PictTransform *t)
{
if (t == NULL)
return true;
 
return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
}
 
bool
sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx,
pixman_fixed_t *ty)
{
if (t == NULL) {
*tx = *ty = 0;
return true;
}
 
if (t->matrix[0][0] != IntToxFixed(1) ||
t->matrix[0][1] != 0 ||
t->matrix[1][0] != 0 ||
t->matrix[1][1] != IntToxFixed(1) ||
t->matrix[2][0] != 0 ||
t->matrix[2][1] != 0 ||
t->matrix[2][2] != IntToxFixed(1))
return false;
 
*tx = t->matrix[0][2];
*ty = t->matrix[1][2];
return true;
}
 
bool
sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty)
{
if (t == NULL) {
*tx = *ty = 0;
return true;
}
 
if (t->matrix[0][0] != IntToxFixed(1) ||
t->matrix[0][1] != 0 ||
t->matrix[1][0] != 0 ||
t->matrix[1][1] != IntToxFixed(1) ||
t->matrix[2][0] != 0 ||
t->matrix[2][1] != 0 ||
t->matrix[2][2] != IntToxFixed(1))
return false;
 
if (pixman_fixed_fraction(t->matrix[0][2]) ||
pixman_fixed_fraction(t->matrix[1][2]))
return false;
 
*tx = pixman_fixed_to_int(t->matrix[0][2]);
*ty = pixman_fixed_to_int(t->matrix[1][2]);
return true;
}
 
/**
* Returns the floating-point coordinates transformed by the given transform.
*/
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out)
{
if (transform == NULL) {
*x_out = x;
*y_out = y;
} else
_sna_get_transformed_coordinates(x, y, transform, x_out, y_out);
}
 
/**
* Returns the un-normalized floating-point coordinates transformed by the given transform.
*/
void
sna_get_transformed_coordinates_3d(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out, float *w_out)
{
if (transform == NULL) {
*x_out = x;
*y_out = y;
*w_out = 1;
} else {
int64_t result[3];
 
if (_sna_transform_point(transform, x, y, result)) {
*x_out = result[0] / 65536.;
*y_out = result[1] / 65536.;
*w_out = result[2] / 65536.;
} else {
*x_out = *y_out = 0;
*w_out = 1.;
}
}
}