Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4251 → Rev 4250

/drivers/video/Intel-2D/sna_transform.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen5.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen4.c
File deleted
/drivers/video/Intel-2D/sna_cpu.c
File deleted
/drivers/video/Intel-2D/kgem.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen3.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen7.c
File deleted
/drivers/video/Intel-2D/sna_cpuid.h
File deleted
/drivers/video/Intel-2D/i915_pciids.h
File deleted
/drivers/video/Intel-2D/gen3_render.c
107,15 → 107,7
{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}
};
 
#define xFixedToDouble(f) pixman_fixed_to_double(f)
195,14 → 187,7
switch (format) {
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_r5g6b5:
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a8:
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return false;
default:
return true;
222,257 → 207,15
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return BIAS | COLR_BUF_ARGB8888;
case PICT_r5g6b5:
case PICT_b5g6r5:
return BIAS | COLR_BUF_RGB565;
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a1b5g5r5:
case PICT_x1b5g5r5:
return BIAS | COLR_BUF_ARGB1555;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a2b10g10r10:
case PICT_x2b10g10r10:
return BIAS | COLR_BUF_ARGB2AAA;
case PICT_a8:
return BIAS | COLR_BUF_8BIT;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
case PICT_a4b4g4r4:
case PICT_x4b4g4r4:
return BIAS | COLR_BUF_ARGB4444;
}
#undef BIAS
}
 
 
#if 0
static bool gen3_check_repeat(PicturePtr p)
{
if (!p->repeat)
return true;
 
switch (p->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
 
static uint32_t gen3_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
case PictFilterBilinear:
return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
}
}
 
static bool gen3_check_filter(PicturePtr p)
{
switch (p->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
int16_t dst_x, dst_y;
int16_t src_x, src_y;
 
dst_x = r->dst.x + op->dst.x;
dst_y = r->dst.y + op->dst.y;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
 
gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
OUT_VERTEX(src_x + r->width);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y);
}
 
fastcall static void
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2;
v[1] = box->y2;
v[2] = box->x2 + op->src.offset[0];
v[3] = box->y2 + op->src.offset[1];
 
v[4] = box->x1;
v[5] = box->y2;
v[6] = box->x1 + op->src.offset[0];
v[7] = box->y2 + op->src.offset[1];
 
v[8] = box->x1;
v[9] = box->y1;
v[10] = box->x1 + op->src.offset[0];
v[11] = box->y1 + op->src.offset[1];
 
v += 12;
box++;
} while (--nbox);
}
fastcall static void
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
const PictTransform *transform = op->src.transform;
 
do {
v[0] = box->x2;
v[1] = box->y2;
_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[2], &v[3]);
 
v[4] = box->x1;
v[5] = box->y2;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[6], &v[7]);
 
v[8] = box->x1;
v[9] = box->y1;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y1 + op->src.offset[1],
transform, op->src.scale,
&v[10], &v[11]);
 
box++;
v += 12;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[2] = v[6] + w * op->src.scale[0];
 
v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
 
fastcall static void
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2 + op->dst.x;
v[8] = v[4] = box->x1 + op->dst.x;
v[5] = v[1] = box->y2 + op->dst.y;
v[9] = box->y1 + op->dst.y;
 
v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 12;
box++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x;
v[9] = r->dst.y;
 
v[0] = v[4] + w;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = r->src.x * op->src.scale[0];
v[11] = r->src.y * op->src.scale[1];
 
v[2] = v[6] + w * op->src.scale[0];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
fastcall static void
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
v[2] = v[6] + w * op->mask.scale[0];
 
v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
v[7] = v[3] = v[11] + h * op->mask.scale[1];
}
#endif
 
fastcall static void
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
1141,7 → 884,6
 
state->current_dst = bo->unique_id;
}
assert(bo->exec);
kgem_bo_mark_dirty(bo);
}
 
1196,7 → 938,6
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->src.card_format);
map[tex_count * 2 + 0] =
op->src.card_format |
gen3_ms_tiling(op->src.bo->tiling) |
1232,7 → 973,6
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->mask.card_format);
map[tex_count * 2 + 0] =
op->mask.card_format |
gen3_ms_tiling(op->mask.bo->tiling) |
1617,8 → 1357,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
1754,674 → 1494,88
return false;
}
 
#if 0
static int
gen3_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->card_format = 0;
 
if (picture->pDrawable == NULL) {
SourcePict *source = picture->pSourcePict;
int ret = -1;
 
switch (source->type) {
case SourcePictTypeSolidFill:
DBG(("%s: solid fill [%08x], format %08x\n",
__FUNCTION__,
(unsigned)source->solidFill.color,
(unsigned)picture->format));
ret = gen3_init_solid(channel, source->solidFill.color);
break;
 
case SourcePictTypeLinear:
ret = gen3_init_linear(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
 
case SourcePictTypeRadial:
ret = gen3_init_radial(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
}
 
if (ret == -1) {
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (sna_picture_is_solid(picture, &color)) {
DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color);
}
 
if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
}
 
if (!gen3_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen3_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
channel->pict_format = picture->format;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else {
channel->transform = picture->transform;
channel->is_affine = sna_transform_is_affine(picture->transform);
}
 
if (!gen3_composite_channel_set_format(channel, picture->format) &&
!gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
assert(channel->card_format);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
__FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height,
x, y, w, h));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static inline bool
source_use_blt(struct sna *sna, PicturePtr picture)
{
/* If it is a solid, try to use the BLT paths */
if (!picture->pDrawable)
return picture->pSourcePict->type == SourcePictTypeSolidFill;
 
if (picture->pDrawable->width == 1 &&
picture->pDrawable->height == 1 &&
picture->repeat)
return true;
 
if (too_large(picture->pDrawable->width, picture->pDrawable->height))
return true;
 
return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst,
PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
DBG(("%s: target too large for 3D pipe (%d, %d)\n",
__FUNCTION__,
dst->pDrawable->width, dst->pDrawable->height));
return true;
}
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return source_use_blt(sna, src);
}
#endif
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
}
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
#if 0
static bool
gen3_composite_fallback(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen3_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
if (mask &&
mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
op != PictOpOver) {
DBG(("%s: component-alpha mask with op=%d, should fallback\n",
__FUNCTION__, op));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask && mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
__FUNCTION__, dst_use_cpu(dst_pixmap)));
return dst_use_cpu(dst_pixmap);
}
 
static bool
gen3_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t mask_x, int16_t mask_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s()\n", __FUNCTION__));
 
if (op >= ARRAY_SIZE(gen3_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
/* Try to use the BLT engine unless it implies a
* 3D -> 2D context switch.
*/
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna,
op, src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen3_composite_fallback(sna, op, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
mask_x, mask_y,
dst_x, dst_y,
width, height,
tmp);
 
if (!gen3_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height)) {
DBG(("%s: unable to set render target\n",
__FUNCTION__));
return false;
}
 
tmp->op = op;
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
if (too_large(tmp->dst.width, tmp->dst.height) ||
!gen3_check_pitch_3d(tmp->dst.bo)) {
if (!sna_render_composite_redirect(sna, tmp,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
}
 
tmp->u.gen3.num_constants = 0;
tmp->src.u.gen3.type = SHADER_TEXTURE;
tmp->src.is_affine = true;
DBG(("%s: preparing source\n", __FUNCTION__));
switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
tmp->src.u.gen3.type = SHADER_ZERO;
break;
case 1:
if (mask == NULL && tmp->src.bo &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen3_composite_channel_convert(&tmp->src);
break;
}
DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
 
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->mask.is_affine = true;
tmp->need_magic_ca_pass = false;
tmp->has_component_alpha = false;
if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, mask_x, mask_y)) {
tmp->mask.u.gen3.type = SHADER_TEXTURE;
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
mask_x, mask_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
tmp->mask.u.gen3.type = SHADER_ZERO;
break;
case 1:
gen3_composite_channel_convert(&tmp->mask);
break;
}
}
DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
if (tmp->mask.u.gen3.type == SHADER_ZERO) {
if (tmp->src.bo) {
kgem_bo_destroy(&sna->kgem,
tmp->src.bo);
tmp->src.bo = NULL;
}
tmp->src.u.gen3.type = SHADER_ZERO;
tmp->mask.u.gen3.type = SHADER_NONE;
}
 
if (tmp->mask.u.gen3.type != SHADER_NONE) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
/* Check if it's component alpha that relies on a source alpha
* and on the source value. We can only get one of those
* into the single source value that we get to blend with.
*/
DBG(("%s: component-alpha mask: %d\n",
__FUNCTION__, tmp->mask.u.gen3.type));
tmp->has_component_alpha = true;
if (tmp->mask.u.gen3.type == SHADER_WHITE) {
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->has_component_alpha = false;
} else if (gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
if (op != PictOpOver)
goto cleanup_mask;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
} else {
if (tmp->mask.is_opaque) {
tmp->mask.u.gen3.type = SHADER_NONE;
} else if (is_constant_ps(tmp->src.u.gen3.type) &&
is_constant_ps(tmp->mask.u.gen3.type)) {
uint32_t v;
 
v = multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
24);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
16);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
8);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
0);
 
DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
__FUNCTION__,
tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
v));
 
tmp->src.u.gen3.type = SHADER_CONSTANT;
tmp->src.u.gen3.mode = v;
tmp->src.is_opaque = false;
 
tmp->mask.u.gen3.type = SHADER_NONE;
}
}
}
}
DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
tmp->src.is_affine, tmp->mask.is_affine));
 
tmp->prim_emit = gen3_emit_composite_primitive;
if (is_constant_ps(tmp->mask.u.gen3.type)) {
switch (tmp->src.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
} else
#endif
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
tmp->prim_emit = gen3_emit_composite_primitive_constant;
tmp->emit_boxes = gen3_emit_composite_boxes_constant;
}
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
}
}
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
}
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
}
}
break;
}
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
if (tmp->mask.transform == NULL) {
if (is_constant_ps(tmp->src.u.gen3.type)) {
if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
}
}
} else if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
}
}
}
}
 
tmp->floats_per_vertex = 2;
if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
!is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
!is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
tmp->floats_per_vertex,
tmp->prim_emit != gen3_emit_composite_primitive));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
tmp->boxes = gen3_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen3_render_composite_boxes;
tmp->thread_boxes = gen3_render_composite_boxes__thread;
}
tmp->done = gen3_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
#endif
 
 
 
2556,6 → 1710,19
 
 
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
 
 
2658,59 → 1825,13
 
 
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
const char *gen3_render_init(struct sna *sna, const char *backend)
{
struct sna_render *render = &sna->render;
 
#if 0
#if !NO_COMPOSITE
render->composite = gen3_render_composite;
render->prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
render->check_composite_spans = gen3_check_composite_spans;
render->composite_spans = gen3_render_composite_spans;
render->prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
render->video = gen3_render_video;
 
render->copy_boxes = gen3_render_copy_boxes;
render->copy = gen3_render_copy;
 
render->fill_boxes = gen3_render_fill_boxes;
render->fill = gen3_render_fill;
render->fill_one = gen3_render_fill_one;
#endif
 
render->blit_tex = gen3_blit_tex;
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return "Alviso (gen3)";
}
 
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
2812,3 → 1933,39
gen3_align_vertex(sna, tmp);
return true;
}
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
bool gen3_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
 
// render->video = gen3_render_video;
 
render->blit_tex = gen3_blit_tex;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return true;
}
/drivers/video/Intel-2D/kgem-sna.c
0,0 → 1,4303
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
 
static inline
int user_free(void *mem)
{
int val;
__asm__ __volatile__(
"int $0x40"
:"=a"(val)
:"a"(68),"b"(12),"c"(mem));
return val;
}
 
 
unsigned int cpu_cache_size();
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
#define DBG_NO_HW 0
#define DBG_NO_TILING 1
#define DBG_NO_CACHE 0
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 1
#define DBG_NO_UPLOAD_CACHE 0
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
 
#ifndef DEBUG_SYNC
#define DEBUG_SYNC 0
#endif
 
#define SHOW_BATCH 1
 
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
#else
#define ASSERT_IDLE(kgem__, handle__)
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
#endif
 
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
* offsets be aligned to a cacheline (64 bytes).
*/
#define UPLOAD_ALIGNMENT 128
 
#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
 
#define MAX_GTT_VMA_CACHE 512
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
 
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
struct local_i915_gem_userptr {
uint64_t user_ptr;
uint32_t user_size;
uint32_t flags;
#define I915_USERPTR_READ_ONLY (1<<0)
#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
uint32_t handle;
};
 
#define UNCACHED 0
#define SNOOPED 1
 
struct local_i915_gem_cacheing {
uint32_t handle;
uint32_t cacheing;
};
 
#define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHING
 
struct local_fbinfo {
int width;
int height;
int pitch;
int tiling;
};
 
struct kgem_buffer {
struct kgem_bo base;
void *mem;
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
};
 
static struct kgem_bo *__kgem_freed_bo;
static struct kgem_request *__kgem_freed_request;
static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
 
static inline int bytes(struct kgem_bo *bo)
{
return __kgem_bo_size(bo);
}
 
#define bucket(B) (B)->size.pages.bucket
#define num_pages(B) (B)->size.pages.count
 
#ifdef DEBUG_MEMORY
static void debug_alloc(struct kgem *kgem, size_t size)
{
kgem->debug_memory.bo_allocs++;
kgem->debug_memory.bo_bytes += size;
}
static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
{
debug_alloc(kgem, bytes(bo));
}
#else
#define debug_alloc(k, b)
#define debug_alloc__bo(k, b)
#endif
 
static void kgem_sna_reset(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.reset(sna);
sna->blt_state.fill_bo = 0;
}
 
static void kgem_sna_flush(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.flush(sna);
 
// if (sna->render.solid_cache.dirty)
// sna_render_flush_solid(sna);
}
 
static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
{
struct drm_i915_gem_set_tiling set_tiling;
int ret;
 
if (DBG_NO_TILING)
return false;
/*
VG_CLEAR(set_tiling);
do {
set_tiling.handle = handle;
set_tiling.tiling_mode = tiling;
set_tiling.stride = stride;
 
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
*/
return false;//ret == 0;
}
 
static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing)
{
struct local_i915_gem_cacheing arg;
 
VG_CLEAR(arg);
arg.handle = handle;
arg.cacheing = cacheing;
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0;
}
 
 
 
 
 
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: not retiring per-request\n", __FUNCTION__));
return false;
}
 
if (!kgem->need_retire) {
DBG(("%s: nothing to retire\n", __FUNCTION__));
return false;
}
 
if (kgem_retire(kgem))
return true;
 
if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
DBG(("%s: not throttling\n", __FUNCTION__));
return false;
}
 
kgem_throttle(kgem);
return kgem_retire(kgem);
}
 
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
void *ptr;
 
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
 
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
printf("%s: failed to retrieve GTT offset for handle=%d: %d\n",
__FUNCTION__, bo->handle, 0);
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry_gtt;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry_gtt;
}
 
return NULL;
}
 
retry_mmap:
ptr = (void*)(int)mmap_arg.offset;
if (ptr == NULL) {
printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), 0);
 
}
 
return ptr;
}
 
static int __gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
static int gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
/* align the transfer to cachelines; fortuitously this is safe! */
if ((offset | length) & 63) {
pwrite.offset = offset & ~63;
pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
} else {
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
}
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
 
bool __kgem_busy(struct kgem *kgem, int handle)
{
struct drm_i915_gem_busy busy;
 
VG_CLEAR(busy);
busy.handle = handle;
busy.busy = !kgem->wedged;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
__FUNCTION__, handle, busy.busy, kgem->wedged));
 
return busy.busy;
}
 
static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
__FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
__kgem_busy(kgem, bo->handle)));
assert(bo->exec == NULL);
assert(list_is_empty(&bo->vma));
 
if (bo->rq) {
if (!__kgem_busy(kgem, bo->handle)) {
__kgem_bo_clear_busy(bo);
kgem_retire(kgem);
}
} else {
assert(!bo->needs_flush);
ASSERT_IDLE(kgem, bo->handle);
}
}
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
assert(bo->refcnt);
assert(!bo->purged);
assert(bo->proxy == NULL);
ASSERT_IDLE(kgem, bo->handle);
 
assert(length <= bytes(bo));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return false;
 
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
if (bo->exec == NULL) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_NONE;
}
return true;
}
 
static uint32_t gem_create(int fd, int num_pages)
{
struct drm_i915_gem_create create;
 
VG_CLEAR(create);
create.handle = 0;
create.size = PAGE_SIZE * num_pages;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 
return create.handle;
}
 
static bool
kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->exec == NULL);
assert(!bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = 1;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return true;
#endif
}
 
static bool
kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
if (!bo->purged)
return true;
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
return madv.retained;
 
return false;
#endif
}
 
static bool
kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_WILLNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = !madv.retained;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return false;
#endif
}
 
static void gem_close(int fd, uint32_t handle)
{
struct drm_gem_close close;
 
VG_CLEAR(close);
close.handle = handle;
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
}
 
constant inline static unsigned long __fls(unsigned long word)
{
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
return word;
#else
unsigned int v = 0;
 
while (word >>= 1)
v++;
 
return v;
#endif
}
 
constant inline static int cache_bucket(int num_pages)
{
return __fls(num_pages);
}
 
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
int handle, int num_pages)
{
assert(num_pages);
memset(bo, 0, sizeof(*bo));
 
bo->refcnt = 1;
bo->handle = handle;
bo->target_handle = -1;
num_pages(bo) = num_pages;
bucket(bo) = cache_bucket(num_pages);
bo->reusable = true;
bo->domain = DOMAIN_CPU;
list_init(&bo->request);
list_init(&bo->list);
list_init(&bo->vma);
 
return bo;
}
 
static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
{
struct kgem_bo *bo;
 
if (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
} else {
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
}
 
return __kgem_bo_init(bo, handle, num_pages);
}
 
static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
{
struct kgem_request *rq;
 
rq = __kgem_freed_request;
if (rq) {
__kgem_freed_request = *(struct kgem_request **)rq;
} else {
rq = malloc(sizeof(*rq));
if (rq == NULL)
rq = &kgem->static_request;
}
 
list_init(&rq->buffers);
rq->bo = NULL;
rq->ring = 0;
 
return rq;
}
 
static void __kgem_request_free(struct kgem_request *rq)
{
_list_del(&rq->list);
*(struct kgem_request **)rq = __kgem_freed_request;
__kgem_freed_request = rq;
}
 
static struct list *inactive(struct kgem *kgem, int num_pages)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->inactive[cache_bucket(num_pages)];
}
 
static struct list *active(struct kgem *kgem, int num_pages, int tiling)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->active[cache_bucket(num_pages)][tiling];
}
 
static size_t
agp_aperture_size(struct pci_device *dev, unsigned gen)
{
/* XXX assume that only future chipsets are unknown and follow
* the post gen2 PCI layout.
*/
// return dev->regions[gen < 030 ? 0 : 2].size;
 
return 0;
}
 
static size_t
total_ram_size(void)
{
uint32_t data[9];
size_t size = 0;
 
asm volatile("int $0x40"
: "=a" (size)
: "a" (18),"b"(20), "c" (data)
: "memory");
 
return size != -1 ? size : 0;
}
 
static int gem_param(struct kgem *kgem, int name)
{
drm_i915_getparam_t gp;
int v = -1; /* No param uses the sign bit, reserve it for errors */
 
VG_CLEAR(gp);
gp.param = name;
gp.value = &v;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
return -1;
 
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
return v;
}
 
static bool test_has_execbuffer2(struct kgem *kgem)
{
return 1;
}
 
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
 
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
 
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
bool detected = false;
int ret;
 
if (DBG_NO_SEMAPHORES)
return false;
 
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
if (ret != -1)
return ret > 0;
 
return detected;
}
 
static bool __kgem_throttle(struct kgem *kgem)
{
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0)
return false;
 
return errno == EIO;
}
 
static bool is_hw_supported(struct kgem *kgem,
struct pci_device *dev)
{
if (DBG_NO_HW)
return false;
 
if (!test_has_execbuffer2(kgem))
return false;
 
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
return kgem->has_blt;
 
/* Although pre-855gm the GMCH is fubar, it works mostly. So
* let the user decide through "NoAccel" whether or not to risk
* hw acceleration.
*/
 
if (kgem->gen == 060 && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
 
if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
return kgem->has_blt;
 
return true;
}
 
static bool test_has_relaxed_fencing(struct kgem *kgem)
{
if (kgem->gen < 040) {
if (DBG_NO_RELAXED_FENCING)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
} else
return true;
}
 
static bool test_has_llc(struct kgem *kgem)
{
int has_llc = -1;
 
if (DBG_NO_LLC)
return false;
 
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
#endif
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
}
 
return has_llc;
}
 
static bool test_has_cacheing(struct kgem *kgem)
{
uint32_t handle;
bool ret;
 
if (DBG_NO_CACHE_LEVEL)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
handle = gem_create(kgem->fd, 1);
if (handle == 0)
return false;
 
ret = gem_set_cacheing(kgem->fd, handle, UNCACHED);
gem_close(kgem->fd, handle);
return ret;
}
 
static bool test_has_userptr(struct kgem *kgem)
{
#if defined(USE_USERPTR)
uint32_t handle;
void *ptr;
 
if (DBG_NO_USERPTR)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
ptr = malloc(PAGE_SIZE);
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
gem_close(kgem->fd, handle);
free(ptr);
 
return handle != 0;
#else
return false;
#endif
}
 
static bool test_has_secure_batches(struct kgem *kgem)
{
if (DBG_NO_SECURE_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
}
 
static bool test_has_pinned_batches(struct kgem *kgem)
{
if (DBG_NO_PINNED_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
}
 
 
static bool kgem_init_pinned_batches(struct kgem *kgem)
{
int count[2] = { 2, 1 };
int size[2] = { 1, 2 };
int n, i;
 
if (kgem->wedged)
return true;
 
for (n = 0; n < ARRAY_SIZE(count); n++) {
for (i = 0; i < count[n]; i++) {
struct drm_i915_gem_pin pin;
struct kgem_bo *bo;
 
VG_CLEAR(pin);
 
pin.handle = gem_create(kgem->fd, size[n]);
if (pin.handle == 0)
goto err;
 
DBG(("%s: new handle=%d, num_pages=%d\n",
__FUNCTION__, pin.handle, size[n]));
 
bo = __kgem_bo_alloc(pin.handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, pin.handle);
goto err;
}
 
pin.alignment = 0;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
gem_close(kgem->fd, pin.handle);
goto err;
}
bo->presumed_offset = pin.offset;
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
}
 
return true;
 
err:
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
 
/* For simplicity populate the lists with a single unpinned bo */
for (n = 0; n < ARRAY_SIZE(count); n++) {
struct kgem_bo *bo;
uint32_t handle;
 
handle = gem_create(kgem->fd, size[n]);
if (handle == 0)
break;
 
bo = __kgem_bo_alloc(handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, handle);
break;
}
 
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
return false;
}
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
unsigned half_gpu_max;
unsigned int i, j;
 
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
 
memset(kgem, 0, sizeof(*kgem));
 
kgem->fd = fd;
kgem->gen = gen;
 
list_init(&kgem->requests[0]);
list_init(&kgem->requests[1]);
list_init(&kgem->batch_buffers);
list_init(&kgem->active_buffers);
list_init(&kgem->flushing);
list_init(&kgem->large);
list_init(&kgem->large_inactive);
list_init(&kgem->snoop);
list_init(&kgem->scanout);
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
list_init(&kgem->pinned_batches[i]);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
list_init(&kgem->active[i][j]);
}
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
list_init(&kgem->vma[i].inactive[j]);
}
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
 
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
 
kgem->has_relaxed_delta =
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
kgem->has_relaxed_delta));
 
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
kgem->has_relaxed_fencing));
 
kgem->has_llc = test_has_llc(kgem);
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
kgem->has_llc));
 
kgem->has_cacheing = test_has_cacheing(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_cacheing));
 
kgem->has_userptr = test_has_userptr(kgem);
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
 
kgem->has_no_reloc = test_has_no_reloc(kgem);
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
 
kgem->has_handle_lut = test_has_handle_lut(kgem);
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
 
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
kgem->has_semaphores));
 
kgem->can_blt_cpu = gen >= 030;
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
 
kgem->has_secure_batches = test_has_secure_batches(kgem);
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
 
kgem->has_pinned_batches = test_has_pinned_batches(kgem);
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
kgem->has_pinned_batches));
 
if (!is_hw_supported(kgem, dev)) {
printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
kgem->wedged = 1;
} else if (__kgem_throttle(kgem)) {
printf("Detected a hung GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
kgem->batch_size = ARRAY_SIZE(kgem->batch);
if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
if (gen == 022)
/* 865g cannot handle a batch spanning multiple pages */
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
if ((gen >> 3) == 7)
kgem->batch_size = 16*1024;
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
kgem->batch_size = 4*1024;
 
if (!kgem_init_pinned_batches(kgem) && gen == 020) {
printf("Unable to reserve memory for GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
 
kgem->min_alignment = 16;
if (gen < 040)
kgem->min_alignment = 64;
 
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: half cpu cache %d pages\n", __FUNCTION__,
kgem->half_cpu_cache_pages));
 
kgem->next_request = __kgem_request_alloc(kgem);
 
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing),
kgem->has_llc, kgem->has_cacheing, kgem->has_userptr));
 
VG_CLEAR(aperture);
aperture.aper_size = 0;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
if (aperture.aper_size == 0)
aperture.aper_size = 64*1024*1024;
 
DBG(("%s: aperture size %lld, available now %lld\n",
__FUNCTION__,
(long long)aperture.aper_size,
(long long)aperture.aper_available_size));
 
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/3;
if (gen < 033) {
/* Severe alignment penalties */
kgem->aperture_high /= 2;
kgem->aperture_low /= 2;
}
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
 
kgem->aperture_mappable = agp_aperture_size(dev, gen);
if (kgem->aperture_mappable == 0 ||
kgem->aperture_mappable > aperture.aper_size)
kgem->aperture_mappable = aperture.aper_size;
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
 
kgem->buffer_size = 64 * 1024;
while (kgem->buffer_size < kgem->aperture_mappable >> 10)
kgem->buffer_size *= 2;
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
kgem->buffer_size, kgem->buffer_size / 1024));
 
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc)
kgem->max_gpu_size = MAX_CACHE_SIZE;
 
totalram = total_ram_size();
if (totalram == 0) {
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
__FUNCTION__));
totalram = kgem->aperture_total;
}
DBG(("%s: total ram=%u\n", __FUNCTION__, totalram));
if (kgem->max_object_size > totalram / 2)
kgem->max_object_size = totalram / 2;
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
 
kgem->max_cpu_size = kgem->max_object_size;
 
half_gpu_max = kgem->max_gpu_size / 2;
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
if (kgem->max_copy_tile_size > half_gpu_max)
kgem->max_copy_tile_size = half_gpu_max;
 
if (kgem->has_llc)
kgem->max_upload_tile_size = kgem->max_copy_tile_size;
else
kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
if (kgem->max_upload_tile_size > half_gpu_max)
kgem->max_upload_tile_size = half_gpu_max;
 
kgem->large_object_size = MAX_CACHE_SIZE;
if (kgem->large_object_size > kgem->max_gpu_size)
kgem->large_object_size = kgem->max_gpu_size;
 
if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) {
if (kgem->large_object_size > kgem->max_cpu_size)
kgem->large_object_size = kgem->max_cpu_size;
} else
kgem->max_cpu_size = 0;
if (DBG_NO_CPU)
kgem->max_cpu_size = 0;
 
DBG(("%s: maximum object size=%d\n",
__FUNCTION__, kgem->max_object_size));
DBG(("%s: large object thresold=%d\n",
__FUNCTION__, kgem->large_object_size));
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
__FUNCTION__,
kgem->max_gpu_size, kgem->max_cpu_size,
kgem->max_upload_tile_size, kgem->max_copy_tile_size));
 
/* Convert the aperture thresholds to pages */
kgem->aperture_low /= PAGE_SIZE;
kgem->aperture_high /= PAGE_SIZE;
 
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
 
kgem->batch_flags_base = 0;
if (kgem->has_no_reloc)
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
 
/* XXX hopefully a good approximation */
static uint32_t kgem_get_unique_id(struct kgem *kgem)
{
uint32_t id;
id = ++kgem->unique_id;
if (id == 0)
id = ++kgem->unique_id;
return id;
}
 
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_PRIME)
return 256;
if (flags & CREATE_SCANOUT)
return 64;
return kgem->min_alignment;
}
 
static uint32_t kgem_untiled_pitch(struct kgem *kgem,
uint32_t width, uint32_t bpp,
unsigned flags)
{
width = ALIGN(width, 2) * bpp >> 3;
return ALIGN(width, kgem_pitch_alignment(kgem, flags));
}
 
uint32_t kgem_surface_size(struct kgem *kgem,
bool relaxed_fencing,
unsigned flags,
uint32_t width,
uint32_t height,
uint32_t bpp,
uint32_t tiling,
uint32_t *pitch)
{
uint32_t tile_width, tile_height;
uint32_t size;
 
assert(width <= MAXSHORT);
assert(height <= MAXSHORT);
 
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
tile_width = 128;
tile_height = 32;
} else {
tile_width = 512;
tile_height = 16;
}
} else {
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
break;
 
/* XXX align to an even tile row */
case I915_TILING_X:
tile_width = 512;
tile_height = 16;
break;
case I915_TILING_Y:
tile_width = 128;
tile_height = 64;
break;
}
 
*pitch = ALIGN(width * bpp / 8, tile_width);
height = ALIGN(height, tile_height);
if (kgem->gen >= 040)
return PAGE_ALIGN(*pitch * height);
 
/* If it is too wide for the blitter, don't even bother. */
if (tiling != I915_TILING_NONE) {
if (*pitch > 8192)
return 0;
 
for (size = tile_width; size < *pitch; size <<= 1)
;
*pitch = size;
} else {
if (*pitch >= 32768)
return 0;
}
 
size = *pitch * height;
if (relaxed_fencing || tiling == I915_TILING_NONE)
return PAGE_ALIGN(size);
 
/* We need to allocate a pot fence region for a tiled buffer. */
if (kgem->gen < 030)
tile_width = 512 * 1024;
else
tile_width = 1024 * 1024;
while (tile_width < size)
tile_width *= 2;
return tile_width;
}
 
static uint32_t kgem_aligned_height(struct kgem *kgem,
uint32_t height, uint32_t tiling)
{
uint32_t tile_height;
 
if (kgem->gen <= 030) {
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
} else switch (tiling) {
/* XXX align to an even tile row */
default:
case I915_TILING_NONE:
tile_height = 1;
break;
case I915_TILING_X:
tile_height = 16;
break;
case I915_TILING_Y:
tile_height = 64;
break;
}
 
return ALIGN(height, tile_height);
}
 
static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_exec_object2 *exec;
 
DBG(("%s: handle=%d, index=%d\n",
__FUNCTION__, bo->handle, kgem->nexec));
 
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
 
kgem->aperture += num_pages(bo);
 
return exec;
}
 
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
bo->exec = kgem_add_handle(kgem, bo);
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
list_move_tail(&bo->request, &kgem->next_request->buffers);
 
/* XXX is it worth working around gcc here? */
kgem->flush |= bo->flush;
}
 
static uint32_t kgem_end_batch(struct kgem *kgem)
{
kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
if (kgem->nbatch & 1)
kgem->batch[kgem->nbatch++] = MI_NOOP;
 
return kgem->nbatch;
}
 
static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
{
int n;
 
if (kgem->nreloc__self == 0)
return;
 
for (n = 0; n < kgem->nreloc__self; n++) {
int i = kgem->reloc__self[n];
assert(kgem->reloc[i].target_handle == ~0U);
kgem->reloc[i].target_handle = bo->target_handle;
kgem->reloc[i].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
kgem->reloc[i].delta + bo->presumed_offset;
}
 
if (n == 256) {
for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == ~0U) {
kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
}
}
 
}
 
}
 
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_bo_binding *b;
 
b = bo->binding.next;
while (b) {
struct kgem_bo_binding *next = b->next;
free (b);
b = next;
}
}
 
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
{
int type = IS_CPU_MAP(bo->map);
 
assert(!IS_USER_MAP(bo->map));
 
DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
__FUNCTION__, type ? "CPU" : "GTT",
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
list_del(&bo->vma);
kgem->vma[type].count--;
}
}
 
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
assert(!bo->snoop || bo->rq == NULL);
 
#ifdef DEBUG_MEMORY
kgem->debug_memory.bo_allocs--;
kgem->debug_memory.bo_bytes -= bytes(bo);
#endif
 
kgem_bo_binding_free(kgem, bo);
 
if (IS_USER_MAP(bo->map)) {
assert(bo->rq == NULL);
assert(MAP(bo->map) != bo || bo->io);
if (bo != MAP(bo->map)) {
DBG(("%s: freeing snooped base\n", __FUNCTION__));
free(MAP(bo->map));
}
bo->map = NULL;
}
if (bo->map)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
 
_list_del(&bo->list);
_list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
 
if (!bo->io) {
*(struct kgem_bo **)bo = __kgem_freed_bo;
__kgem_freed_bo = bo;
} else
free(bo);
}
 
inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(bo->domain != DOMAIN_GPU);
assert(!bo->proxy);
assert(!bo->io);
assert(!bo->scanout);
assert(!bo->needs_flush);
assert(list_is_empty(&bo->vma));
ASSERT_IDLE(kgem, bo->handle);
 
kgem->need_expire = true;
 
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
list_move(&bo->list, &kgem->large_inactive);
return;
}
 
assert(bo->flush == false);
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
(!type && !__kgem_bo_is_mappable(kgem, bo))) {
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
}
if (bo->map) {
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
kgem->vma[type].count++;
}
}
}
 
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
{
struct kgem_bo *base;
 
if (!bo->io)
return bo;
 
assert(!bo->snoop);
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
__FUNCTION__, bo->handle));
/* transfer the handle to a minimum bo */
memcpy(base, bo, sizeof(*base));
base->io = false;
list_init(&base->list);
list_replace(&bo->request, &base->request);
list_replace(&bo->vma, &base->vma);
free(bo);
bo = base;
} else
bo->reusable = false;
 
return bo;
}
 
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (bo->map) {
assert(!list_is_empty(&bo->vma));
list_del(&bo->vma);
kgem->vma[IS_CPU_MAP(bo->map)].count--;
}
}
 
inline static void kgem_bo_remove_from_active(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq != NULL);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
assert(list_is_empty(&bo->vma));
}
 
static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->scanout);
assert(!bo->refcnt);
assert(bo->exec == NULL);
assert(bo->proxy == NULL);
 
DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
__FUNCTION__, bo->handle, bo->delta, bo->reusable));
if (bo->delta) {
/* XXX will leak if we are not DRM_MASTER. *shrug* */
// drmModeRmFB(kgem->fd, bo->delta);
bo->delta = 0;
}
 
bo->scanout = false;
bo->flush = false;
bo->reusable = true;
 
if (kgem->has_llc &&
!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED))
bo->reusable = false;
}
 
static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
 
DBG(("%s: size=%d, offset=%d, parent used=%d\n",
__FUNCTION__, bo->size.bytes, bo->delta, io->used));
 
if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
io->used = bo->delta;
}
 
static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->scanout);
assert(bo->delta);
assert(!bo->snoop);
assert(!bo->io);
 
DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
__FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
if (bo->rq)
list_move_tail(&bo->list, &kgem->scanout);
else
list_move(&bo->list, &kgem->scanout);
}
 
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
 
if (num_pages(bo) > kgem->max_cpu_size >> 13) {
DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
__FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
kgem_bo_free(kgem, bo);
return;
}
 
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
 
DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
list_add(&bo->list, &kgem->snoop);
}
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
 
DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
 
if ((kgem->has_cacheing | kgem->has_userptr) == 0)
return NULL;
 
if (list_is_empty(&kgem->snoop)) {
DBG(("%s: inactive and cache empty\n", __FUNCTION__));
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
}
 
list_for_each_entry(bo, &kgem->snoop, list) {
assert(bo->refcnt == 0);
assert(bo->snoop);
assert(!bo->scanout);
assert(bo->proxy == NULL);
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
 
if (num_pages > num_pages(bo))
continue;
 
if (num_pages(bo) > 2*num_pages) {
if (first == NULL)
first = bo;
continue;
}
 
list_del(&bo->list);
bo->pitch = 0;
bo->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
return bo;
}
 
if (first) {
list_del(&first->list);
first->pitch = 0;
first->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, first->handle, num_pages(first)));
return first;
}
 
return NULL;
}
 
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(list_is_empty(&bo->list));
assert(bo->refcnt == 0);
assert(!bo->purged);
assert(bo->proxy == NULL);
 
bo->binding.offset = 0;
 
if (DBG_NO_CACHE)
goto destroy;
 
if (bo->snoop && !bo->flush) {
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
assert(!bo->flush);
assert(list_is_empty(&bo->list));
if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
if (bo->rq == NULL) {
assert(!bo->needs_flush);
kgem_bo_move_to_snoop(kgem, bo);
}
return;
}
 
if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
return;
}
 
if (bo->io)
bo = kgem_bo_replace_io(bo);
if (!bo->reusable) {
DBG(("%s: handle=%d, not reusable\n",
__FUNCTION__, bo->handle));
goto destroy;
}
 
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
 
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->snoop == false);
assert(bo->io == false);
assert(bo->scanout == false);
 
if (bo->exec && kgem->nexec == 1) {
DBG(("%s: only handle in batch, discarding last operations\n",
__FUNCTION__));
assert(bo->exec == &kgem->exec[0]);
assert(kgem->exec[0].handle == bo->handle);
assert(RQ(bo->rq) == kgem->next_request);
bo->refcnt = 1;
kgem_reset(kgem);
bo->refcnt = 0;
}
 
if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
if (bo->rq) {
struct list *cache;
 
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
if (bucket(bo) < NUM_CACHE_BUCKETS)
cache = &kgem->active[bucket(bo)][bo->tiling];
else
cache = &kgem->large;
list_add(&bo->list, cache);
return;
}
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
 
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
 
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
goto destroy;
 
DBG(("%s: handle=%d, purged\n",
__FUNCTION__, bo->handle));
}
 
kgem_bo_move_to_inactive(kgem, bo);
return;
 
destroy:
if (!bo->exec)
kgem_bo_free(kgem, bo);
}
 
static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
__kgem_bo_destroy(kgem, bo);
}
 
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
{
while (!list_is_empty(&bo->base.vma)) {
struct kgem_bo *cached;
 
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
assert(cached->proxy == &bo->base);
list_del(&cached->vma);
 
assert(*(struct kgem_bo **)cached->map == cached);
*(struct kgem_bo **)cached->map = NULL;
cached->map = NULL;
 
kgem_bo_destroy(kgem, cached);
}
}
 
static bool kgem_retire__buffers(struct kgem *kgem)
{
bool retired = false;
 
while (!list_is_empty(&kgem->active_buffers)) {
struct kgem_buffer *bo =
list_last_entry(&kgem->active_buffers,
struct kgem_buffer,
base.list);
 
if (bo->base.rq)
break;
 
DBG(("%s: releasing upload cache for handle=%d? %d\n",
__FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
list_del(&bo->base.list);
kgem_buffer_release(kgem, bo);
kgem_bo_unref(kgem, &bo->base);
retired = true;
}
 
return retired;
}
 
static bool kgem_retire__flushing(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
bool retired = false;
 
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
assert(bo->rq == (void *)kgem);
assert(bo->exec == NULL);
 
if (__kgem_busy(kgem, bo->handle))
break;
 
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else
kgem_bo_free(kgem, bo);
}
#if HAS_DEBUG_FULL
{
int count = 0;
list_for_each_entry(bo, &kgem->flushing, request)
count++;
printf("%s: %d bo on flushing list\n", __FUNCTION__, count);
}
#endif
 
kgem->need_retire |= !list_is_empty(&kgem->flushing);
 
return retired;
}
 
 
static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
bool retired = false;
 
DBG(("%s: request %d complete\n",
__FUNCTION__, rq->bo->handle));
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
assert(RQ(bo->rq) == rq);
assert(bo->exec == NULL);
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
 
list_del(&bo->request);
 
if (bo->needs_flush)
bo->needs_flush = __kgem_busy(kgem, bo->handle);
if (bo->needs_flush) {
DBG(("%s: moving %d to flushing\n",
__FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
continue;
}
 
bo->domain = DOMAIN_NONE;
bo->rq = NULL;
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
assert(rq->bo->rq == NULL);
assert(list_is_empty(&rq->bo->request));
 
if (--rq->bo->refcnt == 0) {
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, rq->bo->handle));
kgem_bo_free(kgem, rq->bo);
}
}
 
__kgem_request_free(rq);
return retired;
}
 
static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
{
bool retired = false;
 
while (!list_is_empty(&kgem->requests[ring])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list);
if (__kgem_busy(kgem, rq->bo->handle))
break;
 
retired |= __kgem_retire_rq(kgem, rq);
}
 
#if HAS_DEBUG_FULL
{
struct kgem_bo *bo;
int count = 0;
 
list_for_each_entry(bo, &kgem->requests[ring], request)
count++;
 
bo = NULL;
if (!list_is_empty(&kgem->requests[ring]))
bo = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list)->bo;
 
printf("%s: ring=%d, %d outstanding requests, oldest=%d\n",
__FUNCTION__, ring, count, bo ? bo->handle : 0);
}
#endif
 
return retired;
}
 
static bool kgem_retire__requests(struct kgem *kgem)
{
bool retired = false;
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
retired |= kgem_retire__requests_ring(kgem, n);
kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
}
 
return retired;
}
 
bool kgem_retire(struct kgem *kgem)
{
bool retired = false;
 
DBG(("%s\n", __FUNCTION__));
 
kgem->need_retire = false;
 
retired |= kgem_retire__flushing(kgem);
retired |= kgem_retire__requests(kgem);
retired |= kgem_retire__buffers(kgem);
 
DBG(("%s -- retired=%d, need_retire=%d\n",
__FUNCTION__, retired, kgem->need_retire));
 
kgem->retire(kgem);
 
return retired;
}
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
{
struct kgem_request *rq;
 
assert(!list_is_empty(&kgem->requests[ring]));
 
rq = list_last_entry(&kgem->requests[ring],
struct kgem_request, list);
if (__kgem_busy(kgem, rq->bo->handle)) {
DBG(("%s: last requests handle=%d still busy\n",
__FUNCTION__, rq->bo->handle));
return false;
}
 
DBG(("%s: ring=%d idle (handle=%d)\n",
__FUNCTION__, ring, rq->bo->handle));
 
kgem_retire__requests_ring(kgem, ring);
assert(list_is_empty(&kgem->requests[ring]));
return true;
}
 
static void kgem_commit(struct kgem *kgem)
{
struct kgem_request *rq = kgem->next_request;
struct kgem_bo *bo, *next;
 
list_for_each_entry_safe(bo, next, &rq->buffers, request) {
assert(next->request.prev == &bo->request);
 
DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
__FUNCTION__, bo->handle, bo->proxy != NULL,
bo->dirty, bo->needs_flush, bo->snoop,
(unsigned)bo->exec->offset));
 
assert(!bo->purged);
assert(bo->exec);
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
 
bo->presumed_offset = bo->exec->offset;
bo->exec = NULL;
bo->target_handle = -1;
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
kgem_bo_free(kgem, bo);
continue;
}
 
bo->binding.offset = 0;
bo->domain = DOMAIN_GPU;
bo->dirty = false;
 
if (bo->proxy) {
/* proxies are not used for domain tracking */
bo->exec = NULL;
__kgem_bo_clear_busy(bo);
}
 
kgem->scanout_busy |= bo->scanout;
}
 
if (rq == &kgem->static_request) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
}
 
kgem_retire(kgem);
assert(list_is_empty(&rq->buffers));
 
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
} else {
list_add_tail(&rq->list, &kgem->requests[rq->ring]);
kgem->need_throttle = kgem->need_retire = 1;
}
 
kgem->next_request = NULL;
}
 
static void kgem_close_list(struct kgem *kgem, struct list *head)
{
while (!list_is_empty(head))
kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
}
 
static void kgem_close_inactive(struct kgem *kgem)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
kgem_close_list(kgem, &kgem->inactive[i]);
}
 
static void kgem_finish_buffers(struct kgem *kgem)
{
struct kgem_buffer *bo, *next;
 
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%d\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
bo->write, bo->mmapped));
 
assert(next->base.list.prev == &bo->base.list);
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
if (!bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d\n",
__FUNCTION__, bo->base.handle, bo->used));
continue;
}
 
if (!bo->write) {
assert(bo->base.exec || bo->base.refcnt > 1);
goto decouple;
}
 
if (bo->mmapped) {
int used;
 
assert(!bo->need_io);
 
used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
continue;
}
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
goto decouple;
}
 
if (!bo->used) {
/* Unless we replace the handle in the execbuffer,
* then this bo will become active. So decouple it
* from the buffer list and track it in the normal
* manner.
*/
goto decouple;
}
 
assert(bo->need_io);
assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(bo->base.domain != DOMAIN_GPU);
 
if (bo->base.refcnt == 1 &&
bo->base.size.pages.count > 1 &&
bo->used < bytes(&bo->base) / 2) {
struct kgem_bo *shrink;
unsigned alloc = NUM_PAGES(bo->used);
 
shrink = search_snoop_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
void *map;
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
map = kgem_bo_map__cpu(kgem, shrink);
if (map) {
kgem_bo_sync__cpu(kgem, shrink);
memcpy(map, bo->mem, bo->used);
 
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
 
shrink = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
if (gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem) == 0) {
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
}
 
DBG(("%s: handle=%d, uploading %d/%d\n",
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
bo->need_io = 0;
 
decouple:
DBG(("%s: releasing handle=%d\n",
__FUNCTION__, bo->base.handle));
list_del(&bo->base.list);
kgem_bo_unref(kgem, &bo->base);
}
}
 
static void kgem_cleanup(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
while (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
bo->exec = NULL;
bo->dirty = false;
__kgem_bo_clear_busy(bo);
if (bo->refcnt == 0)
kgem_bo_free(kgem, bo);
}
 
__kgem_request_free(rq);
}
}
 
kgem_close_inactive(kgem);
}
 
static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
{
int ret;
 
ASSERT_IDLE(kgem, handle);
 
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
 
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
}
 
/* Disjoint surface/batch, upload separately */
ret = gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
if (ret)
return ret;
 
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
}
 
void kgem_reset(struct kgem *kgem)
{
if (kgem->next_request) {
struct kgem_request *rq = kgem->next_request;
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo =
list_first_entry(&rq->buffers,
struct kgem_bo,
request);
list_del(&bo->request);
 
assert(RQ(bo->rq) == rq);
 
bo->binding.offset = 0;
bo->exec = NULL;
bo->target_handle = -1;
bo->dirty = false;
 
if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
} else
__kgem_bo_clear_busy(bo);
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
DBG(("%s: discarding handle=%d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
if (rq != &kgem->static_request) {
list_init(&rq->list);
__kgem_request_free(rq);
}
}
 
kgem->nfence = 0;
kgem->nexec = 0;
kgem->nreloc = 0;
kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
kgem->batch_flags = kgem->batch_flags_base;
 
kgem->next_request = __kgem_request_alloc(kgem);
 
kgem_sna_reset(kgem);
}
 
static int compact_batch_surface(struct kgem *kgem)
{
int size, shrink, n;
 
if (!kgem->has_relaxed_delta)
return kgem->batch_size;
 
/* See if we can pack the contents into one or two pages */
n = ALIGN(kgem->batch_size, 1024);
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
 
shrink = n - size;
if (shrink) {
DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
 
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
 
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
kgem->reloc[n].offset -= shrink;
}
}
 
return size * sizeof(uint32_t);
}
 
static struct kgem_bo *
kgem_create_batch(struct kgem *kgem, int size)
{
struct drm_i915_gem_set_domain set_domain;
struct kgem_bo *bo;
 
if (size <= 4096) {
bo = list_first_entry(&kgem->pinned_batches[0],
struct kgem_bo,
list);
if (!bo->rq) {
out_4096:
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_4096;
}
}
 
if (size <= 16384) {
bo = list_first_entry(&kgem->pinned_batches[1],
struct kgem_bo,
list);
if (!bo->rq) {
out_16384:
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_16384;
}
}
 
if (kgem->gen == 020 && !kgem->has_pinned_batches) {
assert(size <= 16384);
 
bo = list_first_entry(&kgem->pinned_batches[size > 4096],
struct kgem_bo,
list);
list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
 
DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
return NULL;
}
 
kgem_retire(kgem);
assert(bo->rq == NULL);
return kgem_bo_reference(bo);
}
 
return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
}
 
void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
uint32_t batch_end;
int size;
 
assert(!DBG_NO_HW);
assert(!kgem->wedged);
 
assert(kgem->nbatch);
assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
assert(kgem->nbatch <= kgem->surface);
 
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
 
DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
 
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
assert(kgem->nfence <= kgem->fence_max);
 
kgem_finish_buffers(kgem);
 
#if SHOW_BATCH
__kgem_batch_debug(kgem, batch_end);
#endif
 
rq = kgem->next_request;
if (kgem->surface != kgem->batch_size)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
rq->bo = kgem_create_batch(kgem, size);
if (rq->bo) {
uint32_t handle = rq->bo->handle;
int i;
 
assert(!rq->bo->needs_flush);
 
i = kgem->nexec++;
kgem->exec[i].handle = handle;
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
 
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
 
kgem_fixup_self_relocs(kgem, rq->bo);
 
if (kgem_batch_write(kgem, handle, size) == 0) {
struct drm_i915_gem_execbuffer2 execbuf;
int ret, retry = 3;
 
VG_CLEAR(execbuf);
execbuf.buffers_ptr = (uintptr_t)kgem->exec;
execbuf.buffer_count = kgem->nexec;
execbuf.batch_start_offset = 0;
execbuf.batch_len = batch_end*sizeof(uint32_t);
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
execbuf.DR4 = 0;
execbuf.flags = kgem->ring | kgem->batch_flags;
execbuf.rsvd1 = 0;
execbuf.rsvd2 = 0;
 
 
 
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
while (ret == -1 && errno == EBUSY && retry--) {
__kgem_throttle(kgem);
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
}
if (DEBUG_SYNC && ret == 0) {
struct drm_i915_gem_set_domain set_domain;
 
VG_CLEAR(set_domain);
set_domain.handle = handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
}
if (ret == -1) {
// DBG(("%s: GPU hang detected [%d]\n",
// __FUNCTION__, errno));
kgem_throttle(kgem);
kgem->wedged = true;
 
#if 0
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
 
for (i = 0; i < kgem->nexec; i++) {
struct kgem_bo *bo, *found = NULL;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request) {
if (bo->handle == kgem->exec[i].handle) {
found = bo;
break;
}
}
ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
i,
kgem->exec[i].handle,
(int)kgem->exec[i].offset,
found ? kgem_bo_size(found) : -1,
found ? found->tiling : -1,
(int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
found ? found->snoop : -1,
found ? found->purged : -1);
}
for (i = 0; i < kgem->nreloc; i++) {
ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
i,
(int)kgem->reloc[i].offset,
kgem->reloc[i].target_handle,
kgem->reloc[i].delta,
kgem->reloc[i].read_domains,
kgem->reloc[i].write_domain,
(int)kgem->reloc[i].presumed_offset);
}
 
if (DEBUG_SYNC) {
int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
if (fd != -1) {
write(fd, kgem->batch, batch_end*sizeof(uint32_t));
close(fd);
}
 
FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
}
#endif
}
}
 
kgem_commit(kgem);
}
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem_reset(kgem);
 
assert(kgem->next_request != NULL);
}
 
void kgem_throttle(struct kgem *kgem)
{
kgem->need_throttle = 0;
if (kgem->wedged)
return;
 
kgem->wedged = __kgem_throttle(kgem);
if (kgem->wedged) {
printf("Detected a hung GPU, disabling acceleration.\n");
printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
}
}
 
void kgem_purge_cache(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
if (!kgem_bo_is_retained(kgem, bo)) {
DBG(("%s: purging %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
}
 
kgem->need_purge = false;
}
 
bool kgem_expire_cache(struct kgem *kgem)
{
time_t now, expire;
struct kgem_bo *bo;
unsigned int size = 0, count = 0;
bool idle;
unsigned int i;
 
time(&now);
 
while (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
while (__kgem_freed_request) {
struct kgem_request *rq = __kgem_freed_request;
__kgem_freed_request = *(struct kgem_request **)rq;
free(rq);
}
 
while (!list_is_empty(&kgem->large_inactive)) {
kgem_bo_free(kgem,
list_first_entry(&kgem->large_inactive,
struct kgem_bo, list));
 
}
 
while (!list_is_empty(&kgem->scanout)) {
bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
if (__kgem_busy(kgem, bo->handle))
break;
 
list_del(&bo->list);
kgem_bo_clear_scanout(kgem, bo);
__kgem_bo_destroy(kgem, bo);
}
 
expire = 0;
list_for_each_entry(bo, &kgem->snoop, list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME/2;
break;
}
 
bo->delta = now;
}
if (expire) {
while (!list_is_empty(&kgem->snoop)) {
bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
 
if (bo->delta > expire)
break;
 
kgem_bo_free(kgem, bo);
}
}
#ifdef DEBUG_MEMORY
{
long snoop_size = 0;
int snoop_count = 0;
list_for_each_entry(bo, &kgem->snoop, list)
snoop_count++, snoop_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
__FUNCTION__, snoop_count, snoop_size);
}
#endif
 
kgem_retire(kgem);
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem->expire(kgem);
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
expire = 0;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
idle &= list_is_empty(&kgem->inactive[i]);
list_for_each_entry(bo, &kgem->inactive[i], list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME;
break;
}
 
bo->delta = now;
}
}
if (idle) {
DBG(("%s: idle\n", __FUNCTION__));
kgem->need_expire = false;
return false;
}
if (expire == 0)
return true;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
struct list preserve;
 
list_init(&preserve);
while (!list_is_empty(&kgem->inactive[i])) {
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
 
if (bo->delta > expire) {
idle = false;
break;
}
 
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
idle = false;
list_move_tail(&bo->list, &preserve);
} else {
count++;
size += bytes(bo);
kgem_bo_free(kgem, bo);
DBG(("%s: expiring %d\n",
__FUNCTION__, bo->handle));
}
}
if (!list_is_empty(&preserve)) {
preserve.prev->next = kgem->inactive[i].next;
kgem->inactive[i].next->prev = preserve.prev;
kgem->inactive[i].next = preserve.next;
preserve.next->prev = &kgem->inactive[i];
}
}
 
#ifdef DEBUG_MEMORY
{
long inactive_size = 0;
int inactive_count = 0;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_for_each_entry(bo, &kgem->inactive[i], list)
inactive_count++, inactive_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
__FUNCTION__, inactive_count, inactive_size);
}
#endif
 
DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
__FUNCTION__, count, size, idle));
 
kgem->need_expire = !idle;
return !idle;
(void)count;
(void)size;
}
 
void kgem_cleanup_cache(struct kgem *kgem)
{
unsigned int i;
int n;
 
/* sync to the most recent request */
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
if (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
struct drm_i915_gem_set_domain set_domain;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
 
DBG(("%s: sync on cleanup\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
(void)drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN,
&set_domain);
}
}
 
kgem_retire(kgem);
kgem_cleanup(kgem);
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
while (!list_is_empty(&kgem->inactive[i]))
kgem_bo_free(kgem,
list_last_entry(&kgem->inactive[i],
struct kgem_bo, list));
}
 
while (!list_is_empty(&kgem->snoop))
kgem_bo_free(kgem,
list_last_entry(&kgem->snoop,
struct kgem_bo, list));
 
while (__kgem_freed_bo) {
struct kgem_bo *bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
kgem->need_purge = false;
kgem->need_expire = false;
}
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
bool use_active = (flags & CREATE_INACTIVE) == 0;
struct list *cache;
 
DBG(("%s: num_pages=%d, flags=%x, use_active? %d\n",
__FUNCTION__, num_pages, flags, use_active));
 
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE)
return NULL;
 
if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: inactive and cache bucket empty\n",
__FUNCTION__));
 
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: can not retire\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
DBG(("%s: active cache bucket empty\n", __FUNCTION__));
return NULL;
}
 
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: active cache bucket still empty after retire\n",
__FUNCTION__));
return NULL;
}
}
 
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
DBG(("%s: searching for inactive %s map\n",
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), num_pages));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling &&
!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (flags & CREATE_EXACT)
return NULL;
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
return NULL;
}
 
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
list_for_each_entry(bo, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!!bo->rq == !!use_active);
assert(bo->proxy == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
continue;
 
if (use_active &&
kgem->gen <= 040 &&
bo->tiling != I915_TILING_NONE)
continue;
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
continue;
 
if (first)
continue;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->map) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (IS_CPU_MAP(bo->map) != for_cpu) {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
if (first != NULL)
break;
 
first = bo;
continue;
}
}
 
if (use_active)
kgem_bo_remove_from_active(kgem, bo);
else
kgem_bo_remove_from_inactive(kgem, bo);
 
assert(bo->tiling == I915_TILING_NONE);
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n",
__FUNCTION__, bo->handle, num_pages(bo),
use_active ? "active" : "inactive"));
assert(list_is_empty(&bo->list));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (first) {
assert(first->tiling == I915_TILING_NONE);
 
if (use_active)
kgem_bo_remove_from_active(kgem, first);
else
kgem_bo_remove_from_inactive(kgem, first);
 
first->pitch = 0;
first->delta = 0;
DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
__FUNCTION__, first->handle, num_pages(first),
use_active ? "active" : "inactive"));
assert(list_is_empty(&first->list));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
return first;
}
 
return NULL;
}
 
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
{
struct kgem_bo *bo;
uint32_t handle;
 
DBG(("%s(%d)\n", __FUNCTION__, size));
 
if (flags & CREATE_GTT_MAP && kgem->has_llc) {
flags &= ~CREATE_GTT_MAP;
flags |= CREATE_CPU_MAP;
}
 
size = (size + PAGE_SIZE - 1) / PAGE_SIZE;
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
if (bo) {
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_CACHED)
return NULL;
 
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
bo = __kgem_bo_alloc(handle, size);
if (bo == NULL) {
gem_close(kgem->fd, handle);
return NULL;
}
 
debug_alloc__bo(kgem, bo);
return bo;
}
 
inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
{
unsigned int size;
 
assert(bo->tiling);
assert(kgem->gen < 040);
 
if (kgem->gen < 030)
size = 512 * 1024;
else
size = 1024 * 1024;
while (size < bytes(bo))
size *= 2;
 
return size;
}
 
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags)
{
struct list *cache;
struct kgem_bo *bo;
uint32_t pitch, untiled_pitch, tiled_height, size;
uint32_t handle;
int i, bucket, retry;
 
if (tiling < 0)
tiling = -tiling, flags |= CREATE_EXACT;
 
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
width, height, bpp, tiling,
!!(flags & CREATE_EXACT),
!!(flags & CREATE_INACTIVE),
!!(flags & CREATE_CPU_MAP),
!!(flags & CREATE_GTT_MAP),
!!(flags & CREATE_SCANOUT),
!!(flags & CREATE_PRIME),
!!(flags & CREATE_TEMPORARY)));
 
size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
assert(size && size <= kgem->max_object_size);
size /= PAGE_SIZE;
bucket = cache_bucket(size);
 
if (flags & CREATE_SCANOUT) {
assert((flags & CREATE_INACTIVE) == 0);
list_for_each_entry_reverse(bo, &kgem->scanout, list) {
assert(bo->scanout);
assert(bo->delta);
assert(!bo->purged);
 
if (size > num_pages(bo) || num_pages(bo) > 2*size)
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
 
if (flags & CREATE_INACTIVE)
goto large_inactive;
 
tiled_height = kgem_aligned_height(kgem, height, tiling);
untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
 
list_for_each_entry(bo, &kgem->large, list) {
assert(!bo->purged);
assert(!bo->scanout);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->flush == true);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch || bo->tiling != tiling) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
bo->tiling = tiling;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
large_inactive:
list_for_each_entry(bo, &kgem->large_inactive, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
 
if (size > num_pages(bo))
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->delta = 0;
DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
goto create;
}
 
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (kgem->has_llc && tiling == I915_TILING_NONE)
for_cpu = 1;
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
cache = &kgem->vma[for_cpu].inactive[bucket];
do {
list_for_each_entry(bo, cache, vma) {
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
assert(bo->flush == false);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
DBG(("inactive vma with wrong tiling: %d < %d\n",
bo->tiling, tiling));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
bo->pitch = pitch;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
 
kgem_bo_remove_from_inactive(kgem, bo);
 
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
} while (!list_is_empty(cache) &&
__kgem_throttle_retire(kgem, flags));
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
goto create;
}
 
if (flags & CREATE_INACTIVE)
goto skip_active_search;
 
/* Best active match */
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
retry = 3;
search_again:
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->active[bucket][tiling];
if (tiling) {
tiled_height = kgem_aligned_height(kgem, height, tiling);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert(!bo->scanout);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch) {
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
} else {
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->tiling == tiling);
assert(bo->flush == false);
 
if (num_pages(bo) < size)
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->pitch = pitch;
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
 
if (--retry && flags & CREATE_EXACT) {
if (kgem->gen >= 040) {
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
if (i == tiling)
continue;
 
cache = &kgem->active[bucket][i];
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (num_pages(bo) < size)
continue;
 
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = tiling;
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
}
 
bucket++;
goto search_again;
}
 
if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */
untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
i = tiling;
while (--i >= 0) {
tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
cache = active(kgem, tiled_height / PAGE_SIZE, i);
tiled_height = kgem_aligned_height(kgem, height, i);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (bo->tiling) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
} else
bo->pitch = untiled_pitch;
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
}
 
skip_active_search:
bucket = cache_bucket(size);
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3)
retry = 3;
search_inactive:
/* Now just look for a close match and prefer any currently active */
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->inactive[bucket];
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->pitch = pitch;
bo->tiling = tiling;
 
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
assert(bo->pitch);
DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->refcnt == 0);
assert(bo->reusable);
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_INACTIVE &&
!list_is_empty(&kgem->active[bucket][tiling]) &&
__kgem_throttle_retire(kgem, flags)) {
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
if (--retry) {
bucket++;
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
create:
if (bucket >= NUM_CACHE_BUCKETS)
size = ALIGN(size, 1024);
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
bo = __kgem_bo_alloc(handle, size);
if (!bo) {
gem_close(kgem->fd, handle);
return NULL;
}
 
bo->domain = DOMAIN_CPU;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
if (tiling != I915_TILING_NONE &&
gem_set_tiling(kgem->fd, handle, tiling, pitch))
bo->tiling = tiling;
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
bo->flush = true;
}
 
assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
 
debug_alloc__bo(kgem, bo);
 
DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id,
size, num_pages(bo), bucket(bo)));
return bo;
}
 
#if 0
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags)
{
struct kgem_bo *bo;
int stride, size;
 
if (DBG_NO_CPU)
return NULL;
 
DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
 
if (kgem->has_llc) {
bo = kgem_create_2d(kgem, width, height, bpp,
I915_TILING_NONE, flags);
if (bo == NULL)
return bo;
 
assert(bo->tiling == I915_TILING_NONE);
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
return bo;
}
 
assert(width > 0 && height > 0);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
size = stride * ALIGN(height, 2);
assert(size >= PAGE_SIZE);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
if (bo) {
assert(bo->tiling == I915_TILING_NONE);
assert(bo->snoop);
bo->refcnt = 1;
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_cacheing) {
bo = kgem_create_linear(kgem, size, flags);
if (bo == NULL)
return NULL;
 
assert(bo->tiling == I915_TILING_NONE);
 
if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
bo->snoop = true;
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_userptr) {
void *ptr;
 
/* XXX */
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
return NULL;
 
bo = kgem_create_map(kgem, ptr, size, false);
if (bo == NULL) {
free(ptr);
return NULL;
}
 
bo->map = MAKE_USER_MAP(ptr);
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
return NULL;
}
 
 
#endif
 
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, proxy? %d\n",
__FUNCTION__, bo->handle, bo->proxy != NULL));
 
if (bo->proxy) {
_list_del(&bo->vma);
_list_del(&bo->request);
if (bo->io && bo->exec == NULL)
_kgem_bo_delete_buffer(kgem, bo);
kgem_bo_unref(kgem, bo->proxy);
kgem_bo_binding_free(kgem, bo);
free(bo);
return;
}
 
__kgem_bo_destroy(kgem, bo);
}
 
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->rq);
assert(bo->exec == NULL);
assert(bo->needs_flush);
 
/* The kernel will emit a flush *and* update its own flushing lists. */
if (!__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
DBG(("%s: handle=%d, busy?=%d\n",
__FUNCTION__, bo->handle, bo->rq != NULL));
}
 
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
 
bool kgem_check_bo(struct kgem *kgem, ...)
{
va_list ap;
struct kgem_bo *bo;
int num_exec = 0;
int num_pages = 0;
bool flush = false;
 
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
while (bo->proxy)
bo = bo->proxy;
if (bo->exec)
continue;
 
if (needs_semaphore(kgem, bo))
return false;
 
num_pages += num_pages(bo);
num_exec++;
 
flush |= bo->flush;
}
va_end(ap);
 
DBG(("%s: num_pages=+%d, num_exec=+%d\n",
__FUNCTION__, num_pages, num_exec));
 
if (!num_pages)
return true;
 
if (kgem_flush(kgem, flush))
return false;
 
if (kgem->aperture > kgem->aperture_low &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
}
 
if (num_pages + kgem->aperture > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
return false;
}
 
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
return false;
}
 
return true;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domain,
uint32_t delta)
{
int index;
 
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
 
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
if( bo != NULL && bo->handle == -2)
{
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
 
if (read_write_domain & 0x7fff && !bo->dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
return 0;
};
 
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(bo->refcnt);
assert(!bo->purged);
 
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
__FUNCTION__, bo->delta, bo->handle));
delta += bo->delta;
assert(bo->handle == bo->proxy->handle);
/* need to release the cache upon batch submit */
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
}
 
if (read_write_domain & 0x7fff && !bo->dirty)
__kgem_bo_mark_dirty(bo);
 
bo = bo->proxy;
assert(bo->refcnt);
assert(!bo->purged);
}
 
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(RQ_RING(bo->rq) == kgem->ring);
 
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(kgem->nfence < kgem->fence_max);
kgem->aperture_fenced +=
kgem_bo_fenced_size(kgem, bo);
kgem->nfence++;
}
bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
}
 
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
if (read_write_domain & 0x7fff && !bo->dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
 
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
if (kgem->nreloc__self < 256)
kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
 
return delta;
}
 
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
 
DBG(("%s: type=%d, count=%d (bucket: %d)\n",
__FUNCTION__, type, kgem->vma[type].count, bucket));
if (kgem->vma[type].count <= 0)
return;
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
/* vma are limited on a per-process basis to around 64k.
* This includes all malloc arenas as well as other file
* mappings. In order to be fair and not hog the cache,
* and more importantly not to exhaust that limit and to
* start failing mappings, we keep our own number of open
* vma to within a conservative value.
*/
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
 
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
j++) {
struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
if (!list_is_empty(head))
bo = list_last_entry(head, struct kgem_bo, vma);
}
if (bo == NULL)
break;
 
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
assert(IS_CPU_MAP(bo->map) == type);
assert(bo->map);
assert(bo->rq == NULL);
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
 
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: freeing unpurgeable old mapping\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
}
}
}
 
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(!bo->purged);
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
return kgem_bo_map__cpu(kgem, bo);
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(!bo->purged);
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(bo->exec == NULL);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
(kgem->has_llc || bo->domain == DOMAIN_CPU)) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
ptr = kgem_bo_map__cpu(kgem, bo);
kgem_bo_sync__cpu(kgem, bo);
return ptr;
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
if (bo->domain != DOMAIN_GTT) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
/* XXX use PROT_READ to avoid the write flush? */
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_GTT;
}
}
 
return ptr;
}
 
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(!bo->purged);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(bytes(bo) <= kgem->aperture_mappable / 4);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem_bo_map__async(kgem, bo);
}
 
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(!bo->scanout);
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
 
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
printf("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), 0);
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
 
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
 
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
kgem_bo_submit(kgem, bo);
 
if (bo->domain != DOMAIN_CPU) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = I915_GEM_DOMAIN_CPU;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_CPU;
}
}
}
 
void kgem_clear_dirty(struct kgem *kgem)
{
struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
 
list_for_each_entry(bo, buffers, request) {
if (!bo->dirty)
break;
 
bo->dirty = false;
}
}
 
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length)
{
struct kgem_bo *bo;
 
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
offset, length, target->io));
 
bo = __kgem_bo_alloc(target->handle, length);
if (bo == NULL)
return NULL;
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->reusable = false;
bo->size.bytes = length;
 
bo->io = target->io && target->proxy == NULL;
bo->dirty = target->dirty;
bo->tiling = target->tiling;
bo->pitch = target->pitch;
 
assert(!bo->scanout);
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
 
if (target->exec) {
list_move_tail(&bo->request, &kgem->next_request->buffers);
bo->exec = &_kgem_dummy_exec;
}
bo->rq = target->rq;
 
return bo;
}
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b && b->offset; b = b->next)
if (format == b->format)
return b->offset;
 
return 0;
}
 
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b; b = b->next) {
if (b->offset)
continue;
 
b->offset = offset;
b->format = format;
 
if (b->next)
b->next->offset = 0;
 
return;
}
 
b = malloc(sizeof(*b));
if (b) {
b->next = bo->binding.next;
b->format = format;
b->offset = offset;
bo->binding.next = b;
}
}
 
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
bo = __kgem_bo_alloc(-2, size);
if (!bo) {
return 0;
}
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = fb->pitch;
bo->tiling = I915_TILING_NONE;
bo->scanout = 1;
fb->fb_bo = bo;
 
// printf("fb width %d height %d pitch %d bo %p\n",
// fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
};
 
 
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
bo = fb->fb_bo;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
fb->fb_bo = bo;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
if((size != bo->size.pages.count) ||
(fb->pitch != bo->pitch))
{
bo->size.pages.count = size;
bo->pitch = fb->pitch;
 
printf("fb width %d height %d pitch %d bo %p\n",
fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
}
 
return 0;
};
 
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_destroy(kgem, bo);
kgem_bo_free(kgem, bo);
}
 
 
void kgem_close_batches(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
};
 
 
 
/drivers/video/Intel-2D/kgem.h
28,10 → 28,12
#ifndef KGEM_H
#define KGEM_H
 
#define HAS_DEBUG_FULL 1
 
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdarg.h>
#include <stdio.h>
 
#include <i915_drm.h>
 
38,24 → 40,12
#include "compiler.h"
#include "intel_list.h"
 
#include <limits.h>
#if !defined(MAXSHORT) || !defined(MINSHORT) || \
!defined(MAXINT) || !defined(MININT)
/*
* Some implementations #define these through <math.h>, so preclude
* #include'ing it later.
*/
#undef DBG
 
#include <math.h>
#undef MAXSHORT
#define MAXSHORT SHRT_MAX
#undef MINSHORT
#define MINSHORT SHRT_MIN
#undef MAXINT
#define MAXINT INT_MAX
#undef MININT
#define MININT INT_MIN
 
#if HAS_DEBUG_FULL
#define DBG(x) printf x
#else
#define DBG(x)
#endif
 
struct kgem_bo {
74,7 → 64,6
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
struct kgem_bo_binding {
struct kgem_bo_binding *next;
101,8 → 90,7
uint32_t pitch : 18; /* max 128k */
uint32_t tiling : 2;
uint32_t reusable : 1;
uint32_t gpu_dirty : 1;
uint32_t gtt_dirty : 1;
uint32_t dirty : 1;
uint32_t domain : 2;
uint32_t needs_flush : 1;
uint32_t snoop : 1;
185,7 → 173,6
uint32_t scanout_busy:1;
uint32_t busy:1;
 
uint32_t has_create2 :1;
uint32_t has_userptr :1;
uint32_t has_blt :1;
uint32_t has_relaxed_fencing :1;
193,9 → 180,8
uint32_t has_semaphores :1;
uint32_t has_secure_batches :1;
uint32_t has_pinned_batches :1;
uint32_t has_caching :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_wt :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
 
214,23 → 200,10
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
 
#if 0
void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
#endif
 
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
struct drm_i915_gem_relocation_entry reloc[4096];
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[384] page_aligned;
struct drm_i915_gem_relocation_entry reloc[8192] page_aligned;
 
#ifdef DEBUG_MEMORY
struct {
240,11 → 213,9
#endif
};
 
#define KGEM_MAX_DEFERRED_VBO 16
 
#define KGEM_BATCH_RESERVED 1
#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO)
#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO)
#define KGEM_RELOC_RESERVED 4
#define KGEM_EXEC_RESERVED 1
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
262,8 → 233,6
bool read_only);
 
struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size);
int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo);
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags);
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
270,7 → 239,6
struct kgem_bo *target,
int offset, int length);
 
void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr);
 
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
280,8 → 248,6
#define KGEM_CAN_CREATE_LARGE 0x4
#define KGEM_CAN_CREATE_GTT 0x8
 
uint32_t kgem_get_unique_id(struct kgem *kgem);
 
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
315,6 → 281,7
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_retire(struct kgem *kgem);
 
337,11 → 304,6
return kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline bool __kgem_ring_empty(struct kgem *kgem)
{
return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]);
}
 
void _kgem_submit(struct kgem *kgem);
static inline void kgem_submit(struct kgem *kgem)
{
363,8 → 325,21
_kgem_submit(kgem);
}
 
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo);
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
 
if (!bo->needs_flush)
return;
 
/* If the kernel fails to emit the flush, then it will be forced when
* we assume direct access. And as the useual failure is EIO, we do
* not actualy care.
*/
__kgem_flush(kgem, bo);
}
 
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
{
assert(bo->refcnt);
392,9 → 367,6
kgem_submit(kgem);
#endif
 
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
_kgem_submit(kgem);
 
if (kgem->mode == mode)
return;
 
447,11 → 419,6
 
static inline uint32_t *kgem_get_batch(struct kgem *kgem)
{
if (kgem->nreloc) {
unsigned mode = kgem->mode;
_kgem_submit(kgem);
_kgem_set_mode(kgem, mode);
}
 
return kgem->batch + kgem->nbatch;
}
545,12 → 512,6
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
if (kgem->has_llc && bo->tiling == I915_TILING_NONE)
return true;
 
if (!bo->presumed_offset)
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 
583,7 → 544,7
if (kgem_bo_mapped(kgem, bo))
return true;
 
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
if (!bo->tiling && kgem->has_llc)
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
592,22 → 553,6
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
}
 
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
struct kgem_bo *bo,
bool write)
{
if (bo->purged || (bo->scanout && write))
return false;
 
if (kgem->has_llc)
return true;
 
if (bo->domain != DOMAIN_CPU)
return false;
 
return !write || bo->exec == NULL;
}
 
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
assert(bo->refcnt);
616,8 → 561,6
return bo->snoop;
}
 
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo);
 
bool __kgem_busy(struct kgem *kgem, int handle);
 
static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
627,12 → 570,10
 
inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
{
bo->needs_flush = false;
list_del(&bo->request);
bo->rq = NULL;
list_del(&bo->request);
 
bo->domain = DOMAIN_NONE;
bo->needs_flush = false;
bo->gtt_dirty = false;
}
 
static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
643,6 → 584,8
return bo->rq;
}
 
/*
 
static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
661,24 → 604,8
return kgem_bo_is_busy(bo);
}
 
static inline bool kgem_bo_is_render(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__,
bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq)));
assert(bo->refcnt);
return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER;
}
*/
 
static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo)
{
while (bo->proxy) {
bo->flush = true;
bo = bo->proxy;
}
bo->flush = true;
bo->reusable = false;
}
 
static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
{
if (bo == NULL)
685,7 → 612,7
return false;
 
assert(bo->refcnt);
return bo->gpu_dirty;
return bo->dirty;
}
 
static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
705,7 → 632,7
bo->handle, bo->proxy != NULL));
 
bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
bo->needs_flush = bo->gpu_dirty = true;
bo->needs_flush = bo->dirty = true;
list_move(&bo->request, &RQ(bo->rq)->buffers);
}
 
716,7 → 643,7
assert(bo->exec);
assert(bo->rq);
 
if (bo->gpu_dirty)
if (bo->dirty)
return;
 
__kgem_bo_mark_dirty(bo);
745,9 → 672,6
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
 
void kgem_clean_scanout_cache(struct kgem *kgem);
void kgem_clean_large_cache(struct kgem *kgem);
 
#if HAS_DEBUG_FULL
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
#else
/drivers/video/Intel-2D/kgem_debug.c
37,6 → 37,22
 
#include "kgem_debug.h"
 
#include <kos32sys.h>
 
/*
void
ErrorF(const char *f, ...)
{
va_list args;
 
va_start(args, f);
VErrorF(f, args);
va_end(args);
}
*/
 
#define ErrorF printf
 
struct drm_i915_gem_relocation_entry *
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset)
{
372,6 → 388,8
 
static int (*decode_3d(int gen))(struct kgem*, uint32_t)
{
return kgem_gen6_decode_3d;
/*
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_decode_3d;
383,12 → 401,18
return kgem_gen4_decode_3d;
} else if (gen >= 030) {
return kgem_gen3_decode_3d;
} else if (gen >= 020) {
return kgem_gen2_decode_3d;
}
assert(0);
*/
}
 
static void (*finish_state(int gen))(struct kgem*)
{
 
return kgem_gen6_finish_state;
/*
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_finish_state;
400,8 → 424,11
return kgem_gen4_finish_state;
} else if (gen >= 030) {
return kgem_gen3_finish_state;
} else if (gen >= 020) {
return kgem_gen2_finish_state;
}
assert(0);
*/
}
 
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
/drivers/video/Intel-2D/kgem_debug_gen6.c
39,6 → 39,8
 
#include "kgem_debug.h"
 
#define ErrorF printf
 
static struct state {
struct vertex_buffer {
int handle;
/drivers/video/Intel-2D/gen4_render.c
51,7 → 51,6
*/
#define FORCE_SPANS 0
#define FORCE_NONRECTILINEAR_SPANS -1
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
62,8 → 61,6
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
 
#define MAX_FLUSH_VERTICES 6
 
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
289,24 → 286,8
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
318,22 → 299,8
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
426,7 → 393,7
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
offset = kgem_bo_get_binding(bo, format);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
441,10 → 408,9
GEN4_SURFACE_BLEND_ENABLED |
format << GEN4_SURFACE_FORMAT_SHIFT);
 
if (is_dst) {
ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
} else
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
455,7 → 421,7
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
kgem_bo_set_binding(bo, format, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
510,7 → 476,7
static bool gen4_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
unsigned int id = 1 << op->u.gen4.ve_id;
int id = op->u.gen4.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
518,14 → 484,13
 
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
if ((sna->render.vb_id & id) == 0)
if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
ndwords += 2*FORCE_FLUSH;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
if ((sna->render.vb_id & (1 << id)) == 0)
gen4_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
543,8 → 508,7
return rem;
}
 
if (!kgem_check_batch(&sna->kgem,
2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
570,27 → 534,6
int rem;
 
assert(want);
#if FORCE_FLUSH
rem = sna->render.vertex_offset;
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
rem = sna->kgem.nbatch - 5;
if (rem) {
rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
if (rem <= 0) {
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
rem = MAX_FLUSH_VERTICES;
}
} else
rem = MAX_FLUSH_VERTICES;
if (want > rem)
want = rem;
#endif
 
start:
rem = vertex_space(sna);
609,8 → 552,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
730,7 → 673,7
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
961,8 → 904,6
{
bool flush;
 
assert(op->dst.bo->exec);
 
flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
1043,450 → 984,23
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen4_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen4_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen4_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
#if !FORCE_FLUSH
static void
gen4_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen4_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen4_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN4_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen4_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen4_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
static bool
gen4_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen4.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen4.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen4_video_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
do {
int n;
 
n = gen4_get_rectangles(sna, &tmp, nbox,
gen4_video_bind_surfaces);
assert(n);
nbox -= n;
 
do {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
} while (--n);
} while (nbox);
gen4_vertex_flush(sna);
 
return true;
}
 
static int
gen4_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_repeat(picture)) {
DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_filter(picture)) {
DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen4_get_card_format(picture->format);
if (channel->card_format == -1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
{
DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
__FUNCTION__,
channel->repeat, gen4_repeat(channel->repeat),
channel->filter, gen4_repeat(channel->filter)));
channel->repeat = gen4_repeat(channel->repeat);
channel->filter = gen4_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen4_get_card_format(channel->pict_format);
}
#endif
 
static void
gen4_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
1500,409 → 1014,113
 
}
 
#if 0
static bool
gen4_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height))
return true;
 
/* The blitter is much faster for solids */
if (sna_picture_is_solid(src, NULL))
return true;
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return picture_is_cpu(sna, src);
}
 
static bool
check_gradient(PicturePtr picture, bool precise)
{
switch (picture->pSourcePict->type) {
case SourcePictTypeSolidFill:
case SourcePictTypeLinear:
return false;
default:
return precise;
}
}
 
static bool
has_alphamap(PicturePtr p)
{
return p->alphaMap != NULL;
}
 
static bool
need_upload(struct sna *sna, PicturePtr p)
{
return p->pDrawable && untransformed(p) &&
!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
source_is_busy(PixmapPtr pixmap)
{
struct sna_pixmap *priv = sna_pixmap(pixmap);
if (priv == NULL)
return false;
 
if (priv->clear)
return false;
 
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
return true;
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
return priv->gpu_damage && !priv->cpu_damage;
}
 
static bool
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
{
if (sna_picture_is_solid(p, NULL))
return false;
 
if (p->pSourcePict)
return check_gradient(p, precise);
 
if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
return true;
 
/* soft errors: perfer to upload/compute rather than readback */
if (pixmap && source_is_busy(pixmap))
return false;
 
return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
}
 
static bool
gen4_composite_fallback(struct sna *sna,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen4_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(sna, src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(sna, mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
return dst_use_cpu(dst_pixmap);
}
 
static int
reuse_source(struct sna *sna,
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
{
uint32_t color;
 
if (src_x != msk_x || src_y != msk_y)
return false;
 
if (src == mask) {
DBG(("%s: mask is source\n", __FUNCTION__));
*mc = *sc;
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
if (sna_picture_is_solid(mask, &color))
return gen4_channel_init_solid(sna, mc, color);
 
if (sc->is_solid)
return false;
 
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
return false;
 
DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
if (!sna_transform_equal(src->transform, mask->transform))
return false;
 
if (!sna_picture_alphamap_equal(src, mask))
return false;
 
if (!gen4_check_repeat(mask))
return false;
 
if (!gen4_check_filter(mask))
return false;
 
if (!gen4_check_format(mask->format))
return false;
 
DBG(("%s: reusing source channel for mask with a twist\n",
__FUNCTION__));
 
*mc = *sc;
mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
mc->filter = gen4_filter(mask->filter);
mc->pict_format = mask->format;
mc->card_format = gen4_get_card_format(mask->format);
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
static bool
gen4_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
width, height, sna->kgem.ring));
 
if (op >= ARRAY_SIZE(gen4_blend_op))
return false;
tmp->op = PictOpSrc;
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
if (gen4_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
if (!gen4_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->op = op;
switch (gen4_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen4_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen4_blend_op[op].src_alpha &&
(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s -- fallback: unhandled component alpha blend\n",
__FUNCTION__));
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
goto cleanup_src;
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen4_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen4_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
1909,17 → 1127,9
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
tmp->boxes = gen4_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen4_render_composite_boxes;
#if !FORCE_FLUSH
tmp->thread_boxes = gen4_render_composite_boxes__thread;
#endif
}
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
1926,70 → 1136,13
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen4_render_flush(struct sna *sna)
{
2077,7 → 1230,7
}
 
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
uint32_t kernel)
int gen, uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
 
2231,7 → 1384,7
}
 
state->vs = gen4_create_vs_unit_state(&general);
state->sf = gen4_create_sf_state(&general, sf);
state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
 
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
2268,52 → 1421,20
return state->general_bo != NULL;
}
 
const char *gen4_render_init(struct sna *sna, const char *backend)
 
bool gen4_render_init(struct sna *sna)
{
if (!gen4_render_setup(sna))
return backend;
return false;
 
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen4_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen4_check_composite_spans;
sna->render.composite_spans = gen4_render_composite_spans;
if (0)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
#if !NO_VIDEO
sna->render.video = gen4_render_video;
#endif
sna->render.blit_tex = gen4_blit_tex;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen4_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen4_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen4_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen4_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen4_render_fill_one;
#endif
 
#endif
 
sna->render.blit_tex = gen4_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
sna->render.fini = gen4_render_fini;
2320,95 → 1441,8
 
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
}
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
/drivers/video/Intel-2D/gen5_render.c
279,24 → 279,8
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
308,22 → 292,8
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
typedef struct gen5_surface_state_padded {
414,7 → 384,7
 
/* After the first bind, we manage the cache domains within the batch */
if (!DBG_NO_SURFACE_CACHE) {
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
offset = kgem_bo_get_binding(bo, format);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
430,10 → 400,9
GEN5_SURFACE_BLEND_ENABLED |
format << GEN5_SURFACE_FORMAT_SHIFT);
 
if (is_dst) {
ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
} else
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
444,7 → 413,7
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
kgem_bo_set_binding(bo, format, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
572,8 → 541,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
680,13 → 649,8
*
* However, the kernel flushes the pipeline between batches,
* so we should be safe....
*
* On the other hand, after using BLT we must use a non-pipelined
* operation...
* OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
*/
if (sna->kgem.nreloc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen5_emit_state_base_address(sna);
768,15 → 732,12
kernel);
bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
__FUNCTION__, sp, bp, key,
sna->render_state.gen5.last_pipelined_pointers & 0xffff,
(sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
sna->render_state.gen5.last_pipelined_pointers));
if (key == sna->render_state.gen5.last_pipelined_pointers)
return false;
 
 
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen5.vs);
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
785,15 → 746,11
OUT_BATCH(sna->render_state.gen5.wm + sp);
OUT_BATCH(sna->render_state.gen5.cc + bp);
 
bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
sna->render_state.gen5.last_pipelined_pointers = key;
 
gen5_emit_urb(sna);
 
return bp;
return true;
}
 
static bool
static void
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
805,7 → 762,7
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.drawrect_limit == limit &&
sna->render_state.gen5.drawrect_offset == offset)
return false;
return;
 
sna->render_state.gen5.drawrect_offset = offset;
sna->render_state.gen5.drawrect_limit = limit;
814,7 → 771,6
OUT_BATCH(0x00000000);
OUT_BATCH(limit);
OUT_BATCH(offset);
return true;
}
 
static void
936,35 → 892,11
}
}
 
inline static void
gen5_emit_pipe_flush(struct sna *sna)
{
OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
bool flush = false;
 
assert(op->dst.bo->exec);
 
/* drawrect must be first for Ironlake BLT workaround */
if (gen5_emit_drawing_rectangle(sna, op))
offset &= ~1;
gen5_emit_binding_table(sna, offset & ~1);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
DBG(("%s: changed blend state, flush required? %d\n",
__FUNCTION__, (offset & 1) && op->op > PictOpSrc));
flush = (offset & 1) && op->op > PictOpSrc;
}
gen5_emit_vertex_elements(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
972,18 → 904,19
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
if (flush) {
DBG(("%s: forcing flush\n", __FUNCTION__));
gen5_emit_pipe_flush(sna);
 
/* drawrect must be first for Ironlake BLT workaround */
gen5_emit_drawing_rectangle(sna, op);
gen5_emit_binding_table(sna, offset);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
gen5_emit_vertex_elements(sna, op);
}
}
 
static void gen5_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
1020,7 → 953,7
offset = sna->render_state.gen5.surface_table;
}
 
gen5_emit_state(sna, op, offset | dirty);
gen5_emit_state(sna, op, offset);
}
 
fastcall static void
1039,335 → 972,8
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen5_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen5_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen5_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen5_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen5_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen5_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN5_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen5_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen5_get_batch(sna, op);
 
binding_table = gen5_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen5_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen5_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen5_emit_state(sna, op, offset | dirty);
}
 
static bool
gen5_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen5.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen5.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen5_video_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen4_vertex_flush(sna);
return true;
}
#endif
 
static void
gen5_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
1380,175 → 986,69
 
}
 
#if 0
static bool
gen5_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
gen5_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
if (op >= ARRAY_SIZE(gen5_blend_op)) {
DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
return false;
}
tmp->op = PictOpSrc;
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
if (gen5_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
if (!gen5_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
DBG(("%s: preparing source\n", __FUNCTION__));
tmp->op = op;
switch (gen5_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen5_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen5_blend_op[op].src_alpha &&
(gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
goto cleanup_src;
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen5_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->mask);
break;
}
}
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
1555,23 → 1055,15
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
 
tmp->blt = gen5_render_composite_blt;
tmp->box = gen5_render_composite_box;
tmp->boxes = gen5_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen5_render_composite_boxes;
tmp->thread_boxes = gen5_render_composite_boxes__thread;
}
// tmp->box = gen5_render_composite_box;
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
goto cleanup_mask;
}
 
gen5_bind_surfaces(sna, tmp);
1578,448 → 1070,11
gen5_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen5_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen5_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen5_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
DBG(("%s()\n", __FUNCTION__));
 
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen5_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen5_blend_op))
return false;
 
if (gen5_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
dst->format == PICT_a8)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen5_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen5_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height,
true))
return false;
 
switch (gen5_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->base.src);
break;
}
 
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = false;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
}
 
gen5_bind_surfaces(sna, &tmp->base);
gen5_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
 
 
static bool
gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
 
DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
__FUNCTION__, alu,
src->drawable.serialNumber, src_bo->handle,
dst->drawable.serialNumber, dst_bo->handle,
n, box->x1, box->y1, box->x2, box->y2,
flags));
 
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
memset(&tmp, 0, sizeof(tmp));
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen5_check_format(tmp.src.pict_format)) {
DBG(("%s: unsupported source format, %x, use BLT\n",
__FUNCTION__, tmp.src.pict_format));
goto fallback_blt;
}
 
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.x = tmp.dst.y = 0;
tmp.dst.bo = dst_bo;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
goto fallback_tiled;
}
 
tmp.src.filter = SAMPLER_FILTER_NEAREST;
tmp.src.repeat = SAMPLER_EXTEND_NONE;
tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
goto fallback_tiled_dst;
} else {
tmp.src.bo = kgem_bo_reference(src_bo);
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
tmp.src.offset[0] = tmp.src.offset[1] = 0;
tmp.src.scale[0] = 1.f/src->drawable.width;
tmp.src.scale[1] = 1.f/src->drawable.height;
}
 
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.u.gen5.wm_kernel = WM_KERNEL;
tmp.u.gen5.ve_id = 2;
 
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
DBG(("%s: aperture check failed\n", __FUNCTION__));
goto fallback_tiled_src;
}
}
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
tmp.dst.x = tmp.dst.y = 0;
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
 
gen5_copy_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
do {
int n_this_time;
 
n_this_time = gen5_get_rectangles(sna, &tmp, n,
gen5_copy_bind_surfaces);
n -= n_this_time;
 
do {
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
 
box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
DBG(("%s: tiled fallback\n", __FUNCTION__));
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
#endif
 
static void
gen5_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
2342,38 → 1397,16
return state->general_bo != NULL;
}
 
const char *gen5_render_init(struct sna *sna, const char *backend)
bool gen5_render_init(struct sna *sna)
{
if (!gen5_render_setup(sna))
return backend;
return false;
 
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen5_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen5_check_composite_spans;
sna->render.composite_spans = gen5_render_composite_spans;
if (sna->PciInfo->device_id == 0x0044)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen5_render_video;
 
sna->render.copy_boxes = gen5_render_copy_boxes;
sna->render.copy = gen5_render_copy;
 
sna->render.fill_boxes = gen5_render_fill_boxes;
sna->render.fill = gen5_render_fill;
sna->render.fill_one = gen5_render_fill_one;
#endif
 
sna->render.blit_tex = gen5_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen5_render_flush;
sna->render.reset = gen5_render_reset;
2381,89 → 1414,7
 
sna->render.max_3d_size = MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return "Ironlake (gen5)";
};
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen5_render_composite_blt;
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
}
 
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/gen6_render.c
30,6 → 30,9
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
51,7 → 54,7
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 0
#define NO_RING_SWITCH 1
#define PREFER_RENDER 0
 
#define USE_8_PIXEL_DISPATCH 1
65,7 → 68,6
#define GEN6_MAX_SIZE 8192
 
struct gt_info {
const char *name;
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
77,7 → 79,6
};
 
static const struct gt_info gt1_info = {
.name = "Sandybridge (gen6, gt1)",
.max_vs_threads = 24,
.max_gs_threads = 21,
.max_wm_threads = 40,
85,7 → 86,6
};
 
static const struct gt_info gt2_info = {
.name = "Sandybridge (gen6, gt2)",
.max_vs_threads = 60,
.max_gs_threads = 60,
.max_wm_threads = 80,
254,6 → 254,18
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
};
 
/*
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
273,6 → 285,7
case PICT_a4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
*/
}
 
static uint32_t gen6_get_dest_format(PictFormat format)
390,6 → 403,7
int base;
 
if (has_mask) {
 
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
base = GEN6_WM_KERNEL_MASKSA;
397,6 → 411,7
base = GEN6_WM_KERNEL_MASKCA;
} else
base = GEN6_WM_KERNEL_MASK;
 
} else
base = GEN6_WM_KERNEL_NOMASK;
 
874,8 → 889,6
{
bool need_stall = wm_binding_table & 1;
 
assert(op->dst.bo->exec);
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
889,7 → 902,7
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
if (op->dst.bo->exec)
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
1026,6 → 1039,8
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
return 0;
/*
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
1032,6 → 1047,7
case I915_TILING_X: return GEN6_SURFACE_TILED;
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
}
*/
}
 
/**
1052,7 → 1068,7
uint32_t is_scanout = is_dst && bo->scanout;
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
1068,10 → 1084,9
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
if (is_dst) {
ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
} else
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
1080,9 → 1095,9
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
ss[5] = is_scanout ? 0 : 3 << 16;
 
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
1195,8 → 1210,6
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
1207,15 → 1220,10
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen6_rectangle_begin(sna, op))
if (unlikely(sna->render.vertex_offset == 0 &&
!gen6_rectangle_begin(sna, op)))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
1332,6 → 1340,7
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
 
 
fastcall static void
gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
1342,6 → 1351,7
}
 
#if 0
 
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
1452,6 → 1462,7
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#endif
 
#ifndef MAX
1491,6 → 1502,7
}
 
#if 0
 
static uint32_t gen6_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
1594,24 → 1606,21
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__, src_w, src_h, drw_w, drw_h,
REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
1635,7 → 1644,7
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_width == dst_width && src_height == dst_height)
if (src_w == drw_w && src_h == drw_h)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
1671,12 → 1680,10
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
1690,16 → 1697,16
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
1707,6 → 1714,7
}
box++;
}
priv->clear = false;
 
gen4_vertex_flush(sna);
return true;
1814,6 → 1822,7
channel->card_format = gen6_get_card_format(channel->pict_format);
assert(channel->card_format != (unsigned)-1);
}
 
#endif
 
static void gen6_render_composite_done(struct sna *sna,
1828,15 → 1837,16
}
 
 
// sna_render_composite_redirect_done(sna, op);
}
 
#if 0
 
static bool
gen6_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
int x, int y, int w, int h)
{
BoxRec box;
 
1879,23 → 1889,7
return true;
}
 
static bool
prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
{
if (untiled_tlb_miss(tmp->dst.bo) ||
untiled_tlb_miss(tmp->src.bo))
return true;
 
if (kgem_bo_is_render(tmp->dst.bo) ||
kgem_bo_is_render(tmp->src.bo))
return false;
 
if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
 
return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo);
}
 
static bool
gen6_render_composite(struct sna *sna,
uint8_t op,
1914,33 → 1908,11
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen6_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (op == PictOpClear)
op = PictOpSrc;
tmp->op = op;
if (!gen6_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->src,
2028,15 → 2000,15
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
gen4_choose_composite_emitter(sna, tmp));
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
tmp->boxes = gen6_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen6_render_composite_boxes;
tmp->thread_boxes = gen6_render_composite_boxes__thread;
}
// tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
// tmp->boxes = gen6_render_composite_boxes__blt;
// if (tmp->emit_boxes) {
// tmp->boxes = gen6_render_composite_boxes;
// tmp->thread_boxes = gen6_render_composite_boxes__thread;
// }
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2067,6 → 2039,7
return false;
}
 
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
2234,7 → 2207,7
 
tmp->base.op = op;
if (!gen6_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height, true))
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->base.src,
2264,7 → 2237,7
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
gen4_choose_spans_emitter(sna, tmp));
gen4_choose_spans_emitter(tmp));
 
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
2298,6 → 2271,7
}
#endif
 
 
static void
gen6_emit_copy_state(struct sna *sna,
const struct sna_composite_op *op)
2330,6 → 2304,7
gen6_emit_state(sna, op, offset | dirty);
}
 
 
static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *src_bo,
struct kgem_bo *dst_bo,
2351,14 → 2326,11
untiled_tlb_miss(dst_bo))
return true;
 
if (kgem_bo_is_render(dst_bo) ||
kgem_bo_is_render(src_bo))
return false;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
return (prefer_blt_bo(sna, src_bo) >= 0 &&
prefer_blt_bo(sna, dst_bo) > 0);
}
 
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
2492,8 → 2464,7
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
extents.y2 - extents.y1))
goto fallback_tiled;
 
dst_dx += tmp.dst.x;
2508,14 → 2479,14
 
extents = box[0];
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
if (extents.x1 < box[i].x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
if (extents.y1 < box[i].y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
if (extents.x2 > box[i].x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
if (extents.y2 > box[i].y2)
extents.y2 = box[i].y2;
}
 
2661,35 → 2632,10
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
too_large(src->drawable.width, src->drawable.height) ||
too_large(dst->drawable.width, dst->drawable.height)) {
fallback:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy(sna, alu, src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op);
}
 
if (dst->drawable.depth == src->drawable.depth) {
op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
op->base.dst.format = PIXMAN_a8r8g8b8;
op->base.src.pict_format = op->base.dst.format;
} else {
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(op->base.src.pict_format))
goto fallback;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
2729,7 → 2675,119
}
#endif
 
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
 
 
 
#if 0
 
static void
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
{
2768,16 → 2826,10
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (kgem_bo_is_render(bo))
return false;
 
if (untiled_tlb_miss(bo))
return true;
 
if (!prefer_blt_ring(sna, bo, 0))
return false;
 
return prefer_blt_bo(sna, bo);
return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
}
 
static bool
2858,8 → 2910,7
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
extents.y2 - extents.y1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
3334,12 → 3385,12
 
static bool is_gt2(struct sna *sna)
{
return sna->PciInfo->device_id & 0x30;
return DEVICE_ID(sna->PciInfo) & 0x30;
}
 
static bool is_mobile(struct sna *sna)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
3420,52 → 3471,28
return state->general_bo != NULL;
}
 
const char *gen6_render_init(struct sna *sna, const char *backend)
bool gen6_render_init(struct sna *sna)
{
if (!gen6_render_setup(sna))
return backend;
return false;
 
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen6_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen6_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
// sna->render.composite = gen6_render_composite;
// sna->render.video = gen6_render_video;
 
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen6_check_composite_spans;
sna->render.composite_spans = gen6_render_composite_spans;
if (is_mobile(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen6_render_video;
// sna->render.copy_boxes = gen6_render_copy_boxes;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen6_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen6_render_copy;
#endif
sna->render.blit_tex = gen6_blit_tex;
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen6_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen6_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen6_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen6_render_clear;
#endif
#endif
// sna->render.copy = gen6_render_copy;
 
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
sna->render.blit_tex = gen6_blit_tex;
// sna->render.fill_boxes = gen6_render_fill_boxes;
// sna->render.fill = gen6_render_fill;
// sna->render.fill_one = gen6_render_fill_one;
// sna->render.clear = gen6_render_clear;
 
sna->render.flush = gen6_render_flush;
sna->render.reset = gen6_render_reset;
3473,113 → 3500,9
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->render_state.gen6.info->name;
}
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
return true;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/gen7_render.c
45,8 → 45,6
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define ALWAYS_FLUSH 0
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
77,7 → 75,6
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 
struct gt_info {
const char *name;
uint32_t max_vs_threads;
uint32_t max_gs_threads;
uint32_t max_wm_threads;
85,107 → 82,57
int size;
int max_vs_entries;
int max_gs_entries;
int push_ps_size; /* in 1KBs */
} urb;
int gt;
};
 
static const struct gt_info ivb_gt_info = {
.name = "Ivybridge (gen7)",
.max_vs_threads = 16,
.max_gs_threads = 16,
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 64, 64, 8 },
.gt = 0,
.urb = { 128, 64, 64 },
};
 
static const struct gt_info ivb_gt1_info = {
.name = "Ivybridge (gen7, gt1)",
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
.gt = 1,
.urb = { 128, 512, 192 },
};
 
static const struct gt_info ivb_gt2_info = {
.name = "Ivybridge (gen7, gt2)",
.max_vs_threads = 128,
.max_gs_threads = 128,
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 256, 704, 320, 8 },
.gt = 2,
.urb = { 256, 704, 320 },
};
 
static const struct gt_info byt_gt_info = {
.name = "Baytrail (gen7)",
.urb = { 128, 64, 64 },
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
.gt = 1,
};
 
static const struct gt_info hsw_gt_info = {
.name = "Haswell (gen7.5)",
.max_vs_threads = 8,
.max_gs_threads = 8,
.max_wm_threads =
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 64, 64, 8 },
.gt = 0,
.urb = { 128, 64, 64 },
};
 
static const struct gt_info hsw_gt1_info = {
.name = "Haswell (gen7.5, gt1)",
.max_vs_threads = 70,
.max_gs_threads = 70,
.max_wm_threads =
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 640, 256, 8 },
.gt = 1,
.urb = { 128, 640, 256 },
};
 
static const struct gt_info hsw_gt2_info = {
.name = "Haswell (gen7.5, gt2)",
.max_vs_threads = 140,
.max_gs_threads = 140,
.max_wm_threads =
(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640, 8 },
.gt = 2,
};
 
static const struct gt_info hsw_gt3_info = {
.name = "Haswell (gen7.5, gt3)",
.max_vs_threads = 280,
.max_gs_threads = 280,
.max_wm_threads =
(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
(204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 512, 3328, 1280, 16 },
.gt = 3,
.urb = { 256, 1664, 640 },
};
 
inline static bool is_ivb(struct sna *sna)
{
return sna->kgem.gen == 070;
}
 
inline static bool is_byt(struct sna *sna)
{
return sna->kgem.gen == 071;
}
 
inline static bool is_hsw(struct sna *sna)
{
return sna->kgem.gen == 075;
}
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
347,24 → 294,8
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
376,22 → 307,8
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
418,7 → 335,7
gen7_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
OUT_BATCH(8); /* in 1KBs */
 
/* num of VS entries must be divisible by 8 if size < 9 */
OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
442,10 → 359,6
static void
gen7_emit_state_base_address(struct sna *sna)
{
uint32_t mocs;
 
mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
 
OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
453,17 → 366,17
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
mocs | BASE_ADDRESS_MODIFY));
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
mocs | BASE_ADDRESS_MODIFY));
BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
992,17 → 905,10
}
 
inline static void
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
gen7_emit_pipe_flush(struct sna *sna)
{
unsigned stall;
 
stall = 0;
if (need_stall)
stall = (GEN7_PIPE_CONTROL_CS_STALL |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
 
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
}
1024,7 → 930,8
{
bool need_stall;
 
assert(op->dst.bo->exec);
if (sna->render_state.gen7.emit_flush)
gen7_emit_pipe_flush(sna);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1035,18 → 942,13
need_stall = gen7_emit_binding_table(sna, wm_binding_table);
need_stall &= gen7_emit_drawing_rectangle(sna, op);
 
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
if (op->dst.bo->exec)
kgem_bo_mark_dirty(op->dst.bo);
sna->render_state.gen7.emit_flush = false;
need_stall = false;
}
if (sna->render_state.gen7.emit_flush) {
gen7_emit_pipe_flush(sna, need_stall);
need_stall = false;
}
if (need_stall)
gen7_emit_pipe_stall(sna);
 
1189,7 → 1091,7
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
1202,12 → 1104,9
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
gen7_tiling_bits(bo->tiling) |
format << GEN7_SURFACE_FORMAT_SHIFT);
if (bo->tiling == I915_TILING_Y)
ss[0] |= GEN7_SURFACE_VALIGN_4;
if (is_dst) {
ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
} else
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
1214,13 → 1113,13
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
ss[5] = is_scanout ? 0 : 3 << 16;
ss[6] = 0;
ss[7] = 0;
if (is_hsw(sna))
if (sna->kgem.gen == 075)
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
1343,8 → 1242,8
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
1499,266 → 1398,114
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
static uint32_t gen7_bind_video_source(struct sna *sna,
struct kgem_bo *bo,
uint32_t offset,
int width,
int height,
int pitch,
uint32_t format)
{
uint32_t *ss, bind;
 
bind = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
 
assert(bo->tiling == I915_TILING_NONE);
 
ss = sna->kgem.batch + bind;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
format << GEN7_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
I915_GEM_DOMAIN_SAMPLER << 16,
offset);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = 0;
ss[6] = 0;
ss[7] = 0;
if (is_hsw(sna))
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
bind, bo->handle, ss[1],
format, width, height, pitch, offset));
 
return bind * sizeof(uint32_t);
}
 
static void gen7_emit_video_state(struct sna *sna,
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
gen7_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen7_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen7_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen7_emit_state(sna, op, offset);
}
 
static bool
gen7_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
tmp->op = PictOpSrc;
 
memset(&tmp, 0, sizeof(tmp));
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if (src_width == dst_width && src_height == dst_height)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp.u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN7_WM_KERNEL_VIDEO_PLANAR :
GEN7_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
gen7_emit_video_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
__FUNCTION__,
frame->src.x1, frame->src.y1,
src_width, src_height,
dst_width, dst_height,
frame->width, frame->height));
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
__FUNCTION__,
src_scale_x, src_scale_y,
src_offset_x, src_offset_y));
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
__FUNCTION__,
box->x1, box->y1,
box->x2, box->y2,
pix_xoff, pix_yoff,
box->x1 * src_scale_x + src_offset_x,
box->y1 * src_scale_y + src_offset_y,
box->x2 * src_scale_x + src_offset_x,
box->y2 * src_scale_y + src_offset_y));
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
box++;
}
 
gen4_vertex_flush(sna);
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
#endif
 
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
}
 
 
 
1850,143 → 1597,6
 
 
 
 
 
#if 0
static bool
gen7_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen7_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen7_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen7.flags = FILL_FLAGS(op, format);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen7_emit_fill_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen7_get_rectangles(sna, &tmp, n,
gen7_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
#endif
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
2066,20 → 1676,14
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
 
static bool is_gt3(struct sna *sna)
{
assert(sna->kgem.gen == 075);
return sna->PciInfo->device_id & 0x20;
}
 
static bool is_gt2(struct sna *sna)
{
return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
return DEVICE_ID(sna->PciInfo) & 0x20;
}
 
static bool is_mobile(struct sna *sna)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
static bool gen7_render_setup(struct sna *sna)
2089,24 → 1693,19
struct gen7_sampler_state *ss;
int i, j, k, l, m;
 
if (is_ivb(sna)) {
if (sna->kgem.gen == 070) {
state->info = &ivb_gt_info;
if (sna->PciInfo->device_id & 0xf) {
if (DEVICE_ID(sna->PciInfo) & 0xf) {
state->info = &ivb_gt1_info;
if (is_gt2(sna))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
} else if (is_byt(sna)) {
state->info = &byt_gt_info;
} else if (is_hsw(sna)) {
} else if (sna->kgem.gen == 075) {
state->info = &hsw_gt_info;
if (sna->PciInfo->device_id & 0xf) {
if (is_gt3(sna))
state->info = &hsw_gt3_info;
else if (is_gt2(sna))
if (DEVICE_ID(sna->PciInfo) & 0xf) {
state->info = &hsw_gt1_info;
if (is_gt2(sna))
state->info = &hsw_gt2_info;
else
state->info = &hsw_gt1_info;
}
} else
return false;
2173,51 → 1772,16
return state->general_bo != NULL;
}
 
const char *gen7_render_init(struct sna *sna, const char *backend)
bool gen7_render_init(struct sna *sna)
{
if (!gen7_render_setup(sna))
return backend;
return false;
 
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
sna->kgem.expire = gen7_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen7_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen7_check_composite_spans;
sna->render.composite_spans = gen7_render_composite_spans;
if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen7_render_video;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen7_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen7_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen7_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen7_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen7_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen7_render_clear;
#endif
#endif
 
sna->render.blit_tex = gen7_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen7_render_flush;
sna->render.reset = gen7_render_reset;
2225,102 → 1789,9
 
sna->render.max_3d_size = GEN7_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->render_state.gen7.info->name;
}
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
return true;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
 
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
/drivers/video/Intel-2D/sna.c
1,51 → 1,11
/**************************************************************************
 
Copyright 2001 VA Linux Systems Inc., Fremont, California.
Copyright © 2002 by David Dawes
 
All Rights Reserved.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
 
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/*
* Authors: Jeff Hartmann <jhartmann@valinux.com>
* Abraham van der Merwe <abraham@2d3d.co.za>
* David Dawes <dawes@xfree86.org>
* Alan Hourihane <alanh@tungstengraphics.com>
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <memory.h>
#include <malloc.h>
#include <kos32sys.h>
#include <pixlib2.h>
#include "i915_pciids.h"
 
#include "compiler.h"
#include "sna.h"
#include "intel_driver.h"
 
#define to_surface(x) (surface_t*)((x)->handle)
 
86,8 → 46,6
uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing,
unsigned flags, uint32_t width, uint32_t height,
uint32_t bpp, uint32_t tiling, uint32_t *pitch);
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
int pitch, int height);
 
void kgem_close_batches(struct kgem *kgem);
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
95,6 → 53,8
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci);
 
//struct kgem_bo *create_bo(bitmap_t *bitmap);
 
static bool sna_solid_cache_init(struct sna *sna);
 
struct sna *sna_device;
106,46 → 66,8
(void)sna;
}
 
static void no_render_flush(struct sna *sna)
void no_render_init(struct sna *sna)
{
(void)sna;
}
 
static void
no_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (!kgem->nbatch)
return;
 
if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
 
(void)new_mode;
}
 
static void
no_render_retire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_expire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_fini(struct sna *sna)
{
(void)sna;
}
 
const char *no_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
memset (render,0, sizeof (*render));
155,19 → 77,27
render->vertices = render->vertex_data;
render->vertex_size = ARRAY_SIZE(render->vertex_data);
 
// render->composite = no_render_composite;
 
// render->copy_boxes = no_render_copy_boxes;
// render->copy = no_render_copy;
 
// render->fill_boxes = no_render_fill_boxes;
// render->fill = no_render_fill;
// render->fill_one = no_render_fill_one;
// render->clear = no_render_clear;
 
render->reset = no_render_reset;
render->flush = no_render_flush;
render->fini = no_render_fini;
// render->flush = no_render_flush;
// render->fini = no_render_fini;
 
sna->kgem.context_switch = no_render_context_switch;
sna->kgem.retire = no_render_retire;
sna->kgem.expire = no_render_expire;
// sna->kgem.context_switch = no_render_context_switch;
// sna->kgem.retire = no_render_retire;
 
sna->kgem.mode = KGEM_RENDER;
if (sna->kgem.gen >= 60)
sna->kgem.ring = KGEM_RENDER;
 
sna_vertex_init(sna);
return "generic";
}
 
void sna_vertex_init(struct sna *sna)
181,27 → 111,47
{
const char *backend;
 
backend = no_render_init(sna);
if (sna->info->gen >= 0100)
(void)backend;
else if (sna->info->gen >= 070)
backend = gen7_render_init(sna, backend);
else if (sna->info->gen >= 060)
backend = gen6_render_init(sna, backend);
else if (sna->info->gen >= 050)
backend = gen5_render_init(sna, backend);
else if (sna->info->gen >= 040)
backend = gen4_render_init(sna, backend);
else if (sna->info->gen >= 030)
backend = gen3_render_init(sna, backend);
// list_init(&sna->deferred_free);
// list_init(&sna->dirty_pixmaps);
// list_init(&sna->active_pixmaps);
// list_init(&sna->inactive_clock[0]);
// list_init(&sna->inactive_clock[1]);
 
// sna_accel_install_timers(sna);
 
 
backend = "no";
no_render_init(sna);
 
if (sna->info->gen >= 0100) {
} else if (sna->info->gen >= 070) {
if (gen7_render_init(sna))
backend = "IvyBridge";
} else if (sna->info->gen >= 060) {
if (gen6_render_init(sna))
backend = "SandyBridge";
} else if (sna->info->gen >= 050) {
if (gen5_render_init(sna))
backend = "Ironlake";
} else if (sna->info->gen >= 040) {
if (gen4_render_init(sna))
backend = "Broadwater/Crestline";
} else if (sna->info->gen >= 030) {
if (gen3_render_init(sna))
backend = "gen3";
}
 
DBG(("%s(backend=%s, prefer_gpu=%x)\n",
__FUNCTION__, backend, sna->render.prefer_gpu));
 
kgem_reset(&sna->kgem);
 
// if (!sna_solid_cache_init(sna))
// return false;
 
sna_device = sna;
 
 
return kgem_init_fb(&sna->kgem, &sna_fb);
}
 
236,17 → 186,33
 
memset(sna, 0, sizeof(*sna));
 
sna->cpu_features = sna_cpu_detect();
sna->PciInfo = &device;
 
sna->PciInfo = &device;
sna->info = intel_detect_chipset(sna->PciInfo);
sna->scrn = service;
 
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
 
/*
if (!xf86ReturnOptValBool(sna->Options,
OPTION_RELAXED_FENCING,
sna->kgem.has_relaxed_fencing)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED,
"Disabling use of relaxed fencing\n");
sna->kgem.has_relaxed_fencing = 0;
}
if (!xf86ReturnOptValBool(sna->Options,
OPTION_VMAP,
sna->kgem.has_vmap)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_vmap ? X_CONFIG : X_PROBED,
"Disabling use of vmap\n");
sna->kgem.has_vmap = 0;
}
*/
 
/* Disable tiling by default */
sna->tiling = 0;
sna->tiling = SNA_TILING_DISABLE;
 
/* Default fail-safe value of 75 Hz */
// sna->vblank_interval = 1000 * 1000 * 1000 / 75;
524,47 → 490,6
return -1;
};
 
int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
{
surface_t *sf;
struct kgem_bo *bo;
 
sf = malloc(sizeof(*sf));
if(sf == NULL)
goto err_1;
 
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height);
 
__lock_release_recursive(__sna_lock);
 
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = NULL;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
sf->flags = bitmap->flags;
 
bitmap->handle = (uint32_t)sf;
 
return 0;
 
err_2:
__lock_release_recursive(__sna_lock);
free(sf);
err_1:
return -1;
};
 
void sna_set_bo_handle(bitmap_t *bitmap, int handle)
{
surface_t *sf = to_surface(bitmap);
struct kgem_bo *bo = sf->bo;
bo->handle = handle;
}
 
int sna_destroy_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
761,7 → 686,7
 
VG_CLEAR(update);
update.handle = mask_bo->handle;
update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo);
update.bo_map = (__u32)MAP(mask_bo->map);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
mask_bo->pitch = update.bo_pitch;
 
831,6 → 756,8
 
 
 
 
 
static const struct intel_device_info intel_generic_info = {
.gen = -1,
};
880,37 → 807,100
 
static const struct pci_id_match intel_device_match[] = {
 
INTEL_I915G_IDS(&intel_i915_info),
INTEL_I915GM_IDS(&intel_i915_info),
INTEL_I945G_IDS(&intel_i945_info),
INTEL_I945GM_IDS(&intel_i945_info),
 
INTEL_G33_IDS(&intel_g33_info),
INTEL_PINEVIEW_IDS(&intel_g33_info),
INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ),
 
INTEL_I965G_IDS(&intel_i965_info),
INTEL_I965GM_IDS(&intel_i965_info),
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ),
/* Another marketing win: Q35 is another g33 device not a gen4 part
* like its G35 brethren.
*/
INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ),
 
INTEL_G45_IDS(&intel_g4x_info),
INTEL_GM45_IDS(&intel_g4x_info),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ),
 
INTEL_IRONLAKE_D_IDS(&intel_ironlake_info),
INTEL_IRONLAKE_M_IDS(&intel_ironlake_info),
INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ),
 
INTEL_SNB_D_IDS(&intel_sandybridge_info),
INTEL_SNB_M_IDS(&intel_sandybridge_info),
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ),
 
INTEL_IVB_D_IDS(&intel_ivybridge_info),
INTEL_IVB_M_IDS(&intel_ivybridge_info),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ),
 
INTEL_HSW_D_IDS(&intel_haswell_info),
INTEL_HSW_M_IDS(&intel_haswell_info),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ),
 
INTEL_VLV_D_IDS(&intel_valleyview_info),
INTEL_VLV_M_IDS(&intel_valleyview_info),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ),
 
INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ),
 
INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ),
 
{ 0, 0, 0 },
};
 
958,21 → 948,7
 
}
 
int intel_get_device_id(int fd)
{
struct drm_i915_getparam gp;
int devid = 0;
 
memset(&gp, 0, sizeof(gp));
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
 
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
return 0;
 
return devid;
}
 
int drmIoctl(int fd, unsigned long request, void *arg)
{
ioctl_t io;
/drivers/video/Intel-2D/sna.h
37,83 → 37,26
#ifndef _SNA_H_
#define _SNA_H_
 
#include <stdint.h>
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "compiler.h"
 
 
#include <stdint.h>
#include <memory.h>
#include <malloc.h>
#include <errno.h>
#include <kos32sys.h>
 
#include "intel_driver.h"
#include "pciaccess.h"
 
#include <drm.h>
#include <i915_drm.h>
#include "compiler.h"
 
#ifdef HAVE_DRI2_H
#include <dri2.h>
#endif
//#define DBG(x)
//#define DBG(x) ErrorF x
 
#if HAVE_UDEV
#include <libudev.h>
#endif
#define assert(x)
 
#if 0
#include <xorg-server.h>
 
#include <xf86Crtc.h>
#if XF86_CRTC_VERSION >= 5
#define HAS_PIXMAP_SHARING 1
#endif
 
#include <xf86str.h>
#include <windowstr.h>
#include <glyphstr.h>
#include <picturestr.h>
#include <gcstruct.h>
#include <xvdix.h>
 
#include <pciaccess.h>
 
#include <xf86drmMode.h>
 
#include "../compat-api.h"
 
#endif
 
#include <assert.h>
 
#define ErrorF printf
 
#if HAS_DEBUG_FULL
#define DBG(x) ErrorF x
#else
#define DBG(x)
#endif
 
#define DEBUG_NO_BLT 0
 
#define DEBUG_FLUSH_BATCH 0
 
#define TEST_ALL 0
#define TEST_ACCEL (TEST_ALL || 0)
#define TEST_BATCH (TEST_ALL || 0)
#define TEST_BLT (TEST_ALL || 0)
#define TEST_COMPOSITE (TEST_ALL || 0)
#define TEST_DAMAGE (TEST_ALL || 0)
#define TEST_GRADIENT (TEST_ALL || 0)
#define TEST_GLYPHS (TEST_ALL || 0)
#define TEST_IO (TEST_ALL || 0)
#define TEST_KGEM (TEST_ALL || 0)
#define TEST_RENDER (TEST_ALL || 0)
 
int drmIoctl(int fd, unsigned long request, void *arg);
 
 
151,69 → 94,12
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
/* sRGB formats */
PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0)
 
/* 24bpp formats */
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
 
/* 16bpp formats */
PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
 
PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
 
/* 8bpp formats */
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
 
PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
 
PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 4bpp formats */
PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
 
PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 1bpp formats */
PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
 
PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* YUV formats */
PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
 
} pixman_format_code_t;
 
typedef enum _PictFormatShort {
 
PICT_a2r10g10b10 = PIXMAN_a2r10g10b10,
PICT_x2r10g10b10 = PIXMAN_x2r10g10b10,
PICT_a2b10g10r10 = PIXMAN_a2b10g10r10,
PICT_x2b10g10r10 = PIXMAN_x2b10g10r10,
 
PICT_a8r8g8b8 = PIXMAN_a8r8g8b8,
PICT_x8r8g8b8 = PIXMAN_x8r8g8b8,
PICT_a8b8g8r8 = PIXMAN_a8b8g8r8,
221,52 → 107,10
PICT_b8g8r8a8 = PIXMAN_b8g8r8a8,
PICT_b8g8r8x8 = PIXMAN_b8g8r8x8,
 
/* 24bpp formats */
PICT_r8g8b8 = PIXMAN_r8g8b8,
PICT_b8g8r8 = PIXMAN_b8g8r8,
 
/* 16bpp formats */
PICT_r5g6b5 = PIXMAN_r5g6b5,
PICT_b5g6r5 = PIXMAN_b5g6r5,
 
PICT_a1r5g5b5 = PIXMAN_a1r5g5b5,
PICT_x1r5g5b5 = PIXMAN_x1r5g5b5,
PICT_a1b5g5r5 = PIXMAN_a1b5g5r5,
PICT_x1b5g5r5 = PIXMAN_x1b5g5r5,
PICT_a4r4g4b4 = PIXMAN_a4r4g4b4,
PICT_x4r4g4b4 = PIXMAN_x4r4g4b4,
PICT_a4b4g4r4 = PIXMAN_a4b4g4r4,
PICT_x4b4g4r4 = PIXMAN_x4b4g4r4,
 
/* 8bpp formats */
PICT_a8 = PIXMAN_a8,
PICT_r3g3b2 = PIXMAN_r3g3b2,
PICT_b2g3r3 = PIXMAN_b2g3r3,
PICT_a2r2g2b2 = PIXMAN_a2r2g2b2,
PICT_a2b2g2r2 = PIXMAN_a2b2g2r2,
 
PICT_c8 = PIXMAN_c8,
PICT_g8 = PIXMAN_g8,
 
PICT_x4a4 = PIXMAN_x4a4,
 
PICT_x4c4 = PIXMAN_x4c4,
PICT_x4g4 = PIXMAN_x4g4,
 
/* 4bpp formats */
PICT_a4 = PIXMAN_a4,
PICT_r1g2b1 = PIXMAN_r1g2b1,
PICT_b1g2r1 = PIXMAN_b1g2r1,
PICT_a1r1g1b1 = PIXMAN_a1r1g1b1,
PICT_a1b1g1r1 = PIXMAN_a1b1g1r1,
 
PICT_c4 = PIXMAN_c4,
PICT_g4 = PIXMAN_g4,
 
/* 1bpp formats */
PICT_a1 = PIXMAN_a1,
 
PICT_g1 = PIXMAN_g1
} PictFormatShort;
 
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
422,22 → 266,8
#define PictOpMaximum 13
 
 
#define SNA_CURSOR_X 64
#define SNA_CURSOR_Y SNA_CURSOR_X
 
struct sna_client {
int is_compositor; /* only 4 bits used */
};
 
 
//#define assert(x)
 
 
struct sna {
struct kgem kgem;
 
unsigned scrn;
 
unsigned flags;
#define SNA_NO_WAIT 0x1
#define SNA_NO_FLIP 0x2
444,31 → 274,21
#define SNA_TRIPLE_BUFFER 0x4
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
#define SNA_FLUSH_GTT 0x40
#define SNA_IS_HOSTED 0x80
#define SNA_PERFORMANCE 0x100
#define SNA_POWERSAVE 0x200
#define SNA_REPROBE 0x80000000
 
unsigned cpu_features;
#define MMX 0x1
#define SSE 0x2
#define SSE2 0x4
#define SSE3 0x8
#define SSSE3 0x10
#define SSE4_1 0x20
#define SSE4_2 0x40
#define AVX 0x80
#define AVX2 0x100
 
struct list flush_pixmaps;
struct list active_pixmaps;
 
 
 
// int vblank_interval;
 
// struct list deferred_free;
// struct list dirty_pixmaps;
// struct list active_pixmaps;
// struct list inactive_clock[2];
 
unsigned int tiling;
#define SNA_TILING_DISABLE 0x0
#define SNA_TILING_FB 0x1
#define SNA_TILING_2D 0x2
#define SNA_TILING_ALL (~0)
483,6 → 303,7
uint32_t fill_alu;
} blt_state;
union {
// struct gen2_render_state gen2;
struct gen3_render_state gen3;
struct gen4_render_state gen4;
struct gen5_render_state gen5;
497,11 → 318,12
/* Driver phase/state information */
// Bool suspended;
 
struct kgem kgem;
struct sna_render render;
 
#if DEBUG_MEMORY
struct {
int pixmap_allocs;
int shadow_pixels_allocs;
int cpu_bo_allocs;
size_t shadow_pixels_bytes;
size_t cpu_bo_bytes;
530,78 → 352,4
#ifndef MAX
#define MAX(a,b) ((a) >= (b) ? (a) : (b))
#endif
static inline bool
_sna_transform_point(const PictTransform *transform,
int64_t x, int64_t y, int64_t result[3])
{
int j;
 
for (j = 0; j < 3; j++)
result[j] = (transform->matrix[j][0] * x +
transform->matrix[j][1] * y +
transform->matrix[j][2]);
 
return result[2] != 0;
}
 
static inline void
_sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out)
{
 
int64_t result[3];
 
_sna_transform_point(transform, x, y, result);
*x_out = result[0] / (double)result[2];
*y_out = result[1] / (double)result[2];
}
 
static inline void
_sna_get_transformed_scaled(int x, int y,
const PictTransform *transform, const float *sf,
float *x_out, float *y_out)
{
*x_out = sf[0] * (transform->matrix[0][0] * x +
transform->matrix[0][1] * y +
transform->matrix[0][2]);
 
*y_out = sf[1] * (transform->matrix[1][0] * x +
transform->matrix[1][1] * y +
transform->matrix[1][2]);
}
 
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out);
 
void
sna_get_transformed_coordinates_3d(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out, float *z_out);
 
bool sna_transform_is_affine(const PictTransform *t);
bool sna_transform_is_integer_translation(const PictTransform *t,
int16_t *tx, int16_t *ty);
bool sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx, pixman_fixed_t *ty);
static inline bool
sna_affine_transform_is_rotation(const PictTransform *t)
{
assert(sna_transform_is_affine(t));
return t->matrix[0][1] | t->matrix[1][0];
}
 
static inline bool
sna_transform_equal(const PictTransform *a, const PictTransform *b)
{
if (a == b)
return true;
 
if (a == NULL || b == NULL)
return false;
 
return memcmp(a, b, sizeof(*a)) == 0;
}
#endif /* _SNA_H */
/drivers/video/Intel-2D/sna_render.h
30,7 → 30,11
struct sna_composite_op {
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
 
fastcall void (*box)(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
void (*done)(struct sna *sna, const struct sna_composite_op *op);
 
struct sna_damage **damage;
188,6 → 192,9
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap);
 
bool (*fill_boxes)(struct sna *sna,
351,10 → 358,10
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t last_pipelined_pointers;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
uint16_t last_pipelined_pointers;
 
bool needs_invariant;
};
498,15 → 505,17
uint32_t format);
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
const char *no_render_init(struct sna *sna);
const char *gen2_render_init(struct sna *sna, const char *backend);
const char *gen3_render_init(struct sna *sna, const char *backend);
const char *gen4_render_init(struct sna *sna, const char *backend);
const char *gen5_render_init(struct sna *sna, const char *backend);
const char *gen6_render_init(struct sna *sna, const char *backend);
const char *gen7_render_init(struct sna *sna, const char *backend);
void no_render_init(struct sna *sna);
 
bool gen2_render_init(struct sna *sna);
bool gen3_render_init(struct sna *sna);
bool gen4_render_init(struct sna *sna);
bool gen5_render_init(struct sna *sna);
bool gen6_render_init(struct sna *sna);
bool gen7_render_init(struct sna *sna);
 
#if 0
 
bool sna_tiling_composite(uint32_t op,
PicturePtr src,
PicturePtr mask,
674,8 → 683,7
bool
sna_render_composite_redirect(struct sna *sna,
struct sna_composite_op *op,
int x, int y, int width, int height,
bool partial);
int x, int y, int width, int height);
 
void
sna_render_composite_redirect_done(struct sna *sna,
683,8 → 691,8
 
bool
sna_composite_mask_is_opaque(PicturePtr mask);
 
#endif
 
void sna_vertex_init(struct sna *sna);
 
static inline void sna_vertex_lock(struct sna_render *r)
718,69 → 726,4
return was_active;
}
 
#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
PICT_FORMAT_TYPE(format), \
0, \
PICT_FORMAT_R(format), \
PICT_FORMAT_G(format), \
PICT_FORMAT_B(format))
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
#endif /* SNA_RENDER_H */
/drivers/video/Intel-2D/brw/brw_eu.h
2239,6 → 2239,8
struct brw_reg src0,
struct brw_reg src1);
 
void brw_print_reg(struct brw_reg reg);
 
static inline void brw_math_invert(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
/drivers/video/Intel-2D/compiler.h
37,48 → 37,21
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define tightly_packed __attribute__((__packed__))
#define __packed__ __attribute__((__packed__))
#define flatten __attribute__((flatten))
#define page_aligned __attribute__((aligned(4096)))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#define noinline
#define force_inline inline
#define force_inline
#define fastcall
#define must_check
#define constant
#define pure
#define tighly_packed
#define __packed__
#define flatten
#define page_aligned
#endif
 
#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor))
 
#if HAS_GCC(4, 5)
#define sse2 __attribute__((target("sse2,fpmath=sse")))
#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 7)
#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast __attribute__((optimize("Ofast")))
#else
#define fast
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((target("inline-all-stringops")))
#else
#define fast_memcpy
#endif
 
#ifdef HAVE_VALGRIND
#define VG(x) x
#else
/drivers/video/Intel-2D/sna_render_inline.h
17,7 → 17,7
dst->drawable.height > sna->render.max_3d_size);
}
 
static force_inline float pack_2s(int16_t x, int16_t y)
static inline float pack_2s(int16_t x, int16_t y)
{
union {
struct sna_coordinate p;
28,21 → 28,21
return u.f;
}
 
static force_inline int vertex_space(struct sna *sna)
static inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
}
static force_inline void vertex_emit(struct sna *sna, float v)
static inline void vertex_emit(struct sna *sna, float v)
{
assert(sna->render.vertex_used < sna->render.vertex_size);
sna->render.vertices[sna->render.vertex_used++] = v;
}
static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
vertex_emit(sna, pack_2s(x, y));
}
 
static force_inline int batch_space(struct sna *sna)
static inline int batch_space(struct sna *sna)
{
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
49,7 → 49,7
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
}
 
static force_inline void batch_emit(struct sna *sna, uint32_t dword)
static inline void batch_emit(struct sna *sna, uint32_t dword)
{
assert(sna->kgem.mode != KGEM_NONE);
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
56,7 → 56,7
sna->kgem.batch[sna->kgem.nbatch++] = dword;
}
 
static force_inline void batch_emit_float(struct sna *sna, float f)
static inline void batch_emit_float(struct sna *sna, float f)
{
union {
uint32_t dw;
67,4 → 67,5
}
 
 
 
#endif /* SNA_RENDER_INLINE_H */
/drivers/video/Intel-2D/gen4_vertex.c
34,10 → 34,6
#include "sna_render_inline.h"
#include "gen4_vertex.h"
 
#ifndef sse2
#define sse2
#endif
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
227,601 → 223,7
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
/* specialised vertex emission routines */
 
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
force_inline static float
compute_linear(const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
return ((x+channel->offset[0]) * channel->u.linear.dx +
(y+channel->offset[1]) * channel->u.linear.dy +
channel->u.linear.offset);
}
 
sse2 inline static void
emit_texcoord(struct sna *sna,
const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
if (channel->is_solid) {
OUT_VERTEX_F(x);
return;
}
 
x += channel->offset[0];
y += channel->offset[1];
 
if (channel->is_affine) {
float s, t;
 
sna_get_transformed_coordinates(x, y,
channel->transform,
&s, &t);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
} else {
float s, t, w;
 
sna_get_transformed_coordinates_3d(x, y,
channel->transform,
&s, &t, &w);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
OUT_VERTEX_F(w);
}
}
 
sse2 force_inline static void
emit_vertex(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
}
 
sse2 fastcall static void
emit_primitive(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 force_inline static void
emit_vertex_mask(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
emit_texcoord(sna, &op->mask, mskX, mskY);
}
 
sse2 fastcall static void
emit_primitive_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex_mask(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 fastcall static void
emit_primitive_solid(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_solid(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
box++;
v += 6;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse2 fastcall static void
emit_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_simple_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse2 fastcall static void
emit_boxes_simple_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = r->dst.x;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = r->dst.y;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[7], &v[8]);
}
 
sse2 fastcall static void
emit_boxes_affine_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x2,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = box->x1;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = box->y1;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y1,
op->src.transform, op->src.scale,
&v[7], &v[8]);
box++;
v += 9;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
870,831 → 272,8
v[14] = msk_y * op->mask.scale[1];
}
 
sse2 fastcall static void
emit_primitive_simple_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = ((r->src.x + tx) * xx + x0) * sx;
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = ((r->src.y + ty) * yy + y0) * sy;
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
sse2 fastcall static void
emit_primitive_affine_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[6], &v[7]);
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[11], &v[12]);
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
/* SSE4_2 */
#if defined(sse4_2)
 
sse4_2 fastcall static void
emit_primitive_linear__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse4_2 fastcall static void
emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_simple_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse4_2 fastcall static void
emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse4_2 fastcall static void
emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
/* AVX2 */
#if defined(avx2)
 
avx2 fastcall static void
emit_primitive_linear__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
avx2 fastcall static void
emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_simple_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
avx2 fastcall static void
emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
avx2 fastcall static void
emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_linear_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
{
unsigned vb;
 
if (tmp->mask.bo) {
1701,38 → 280,8
if (tmp->mask.transform == NULL) {
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source_mask;
1739,130 → 288,17
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_simple_source_identity;
} else {
DBG(("%s: affine src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source_identity;
}
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else {
DBG(("%s: projective source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 6;
vb = 2 << 2 | 3;
}
} else {
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 1;
vb = 0;
if (tmp->mask.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1 << 2;
} else if (tmp->mask.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 << 2;
}else {
tmp->floats_per_vertex += 3;
vb |= 3 << 2;
}
if (tmp->src.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1;
} else if (tmp->src.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 ;
}else {
tmp->floats_per_vertex += 3;
vb |= 3;
}
DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
__FUNCTION__,tmp->floats_per_vertex, vb));
}
} else {
#if 0
if (tmp->src.is_solid) {
DBG(("%s: solid, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_solid;
if (tmp->src.is_opaque && tmp->op == PictOpOver)
tmp->op = PictOpSrc;
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear;
}
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_simple_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_simple_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_simple_source;
}
} else {
DBG(("%s: affine src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else {
DBG(("%s: projective src, no mask\n", __FUNCTION__));
assert(!tmp->src.is_solid);
tmp->prim_emit = emit_primitive;
tmp->floats_per_vertex = 4;
vb = 3;
}
#endif
}
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
return vb;
}
 
 
 
/drivers/video/Intel-2D/gen7_render.h
1224,8 → 1224,6
 
/* Surface state DW0 */
#define GEN7_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN7_SURFACE_VALIGN_4 (1 << 16)
#define GEN7_SURFACE_HALIGN_8 (1 << 15)
#define GEN7_SURFACE_TILED (1 << 14)
#define GEN7_SURFACE_TILED_Y (1 << 13)
#define GEN7_SURFACE_FORMAT_SHIFT 18
/drivers/video/Intel-2D/gen4_vertex.h
10,6 → 10,7
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
 
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp);
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
 
#endif /* GEN4_VERTEX_H */
/drivers/video/Intel-2D/intel_driver.h
9,52 → 9,180
#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR
#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL
 
#ifndef PCI_CHIP_I810
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
#define PCI_CHIP_I810_BRIDGE 0x7120
#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
#define PCI_CHIP_I810_E_BRIDGE 0x7124
#define PCI_CHIP_I815_BRIDGE 0x1130
#endif
 
#ifndef PCI_CHIP_I830_M
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_I830_M_BRIDGE 0x3575
#endif
 
#ifndef PCI_CHIP_845_G
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_845_G_BRIDGE 0x2560
#endif
 
#ifndef PCI_CHIP_I854
#define PCI_CHIP_I854 0x358E
#define PCI_CHIP_I854_BRIDGE 0x358C
#endif
 
#ifndef PCI_CHIP_I855_GM
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I855_GM_BRIDGE 0x3580
#endif
 
#ifndef PCI_CHIP_I865_G
#define PCI_CHIP_I865_G 0x2572
#define PCI_CHIP_I865_G_BRIDGE 0x2570
#endif
 
#ifndef PCI_CHIP_I915_G
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_I915_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I915_GM
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I915_GM_BRIDGE 0x2590
#endif
 
#ifndef PCI_CHIP_E7221_G
#define PCI_CHIP_E7221_G 0x258A
/* Same as I915_G_BRIDGE */
#define PCI_CHIP_E7221_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I945_G
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_G_BRIDGE 0x2770
#endif
 
#ifndef PCI_CHIP_I945_GM
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
#endif
 
#ifndef PCI_CHIP_I945_GME
#define PCI_CHIP_I945_GME 0x27AE
#define PCI_CHIP_I945_GME_BRIDGE 0x27AC
#endif
 
#ifndef PCI_CHIP_PINEVIEW_M
#define PCI_CHIP_PINEVIEW_M 0xA011
#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010
#define PCI_CHIP_PINEVIEW_G 0xA001
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000
#endif
 
#ifndef PCI_CHIP_G35_G
#define PCI_CHIP_G35_G 0x2982
#define PCI_CHIP_G35_G_BRIDGE 0x2980
#endif
 
#ifndef PCI_CHIP_I965_Q
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_Q_BRIDGE 0x2990
#endif
 
#ifndef PCI_CHIP_I965_G
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_G_BRIDGE 0x29A0
#endif
 
#ifndef PCI_CHIP_I946_GZ
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I946_GZ_BRIDGE 0x2970
#endif
 
#ifndef PCI_CHIP_I965_GM
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GM_BRIDGE 0x2A00
#endif
 
#ifndef PCI_CHIP_I965_GME
#define PCI_CHIP_I965_GME 0x2A12
#define PCI_CHIP_I965_GME_BRIDGE 0x2A10
#endif
 
#ifndef PCI_CHIP_G33_G
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_G33_G_BRIDGE 0x29C0
#endif
 
#ifndef PCI_CHIP_Q35_G
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_Q35_G_BRIDGE 0x29B0
#endif
 
#ifndef PCI_CHIP_Q33_G
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_Q33_G_BRIDGE 0x29D0
#endif
 
#ifndef PCI_CHIP_GM45_GM
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_GM45_BRIDGE 0x2A40
#endif
 
#ifndef PCI_CHIP_G45_E_G
#define PCI_CHIP_G45_E_G 0x2E02
#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00
#endif
 
#ifndef PCI_CHIP_G45_G
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G45_G_BRIDGE 0x2E20
#endif
 
#ifndef PCI_CHIP_Q45_G
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_Q45_G_BRIDGE 0x2E10
#endif
 
#ifndef PCI_CHIP_G41_G
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_G41_G_BRIDGE 0x2E30
#endif
 
#ifndef PCI_CHIP_B43_G
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_B43_G_BRIDGE 0x2E40
#endif
 
#ifndef PCI_CHIP_B43_G1
#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_B43_G1_BRIDGE 0x2E90
#endif
 
#ifndef PCI_CHIP_IRONLAKE_D_G
#define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040
#endif
 
#ifndef PCI_CHIP_IRONLAKE_M_G
#define PCI_CHIP_IRONLAKE_M_G 0x0046
#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044
#endif
 
#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
 
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156
66,52 → 194,88
 
#define PCI_CHIP_HASWELL_D_GT1 0x0402
#define PCI_CHIP_HASWELL_D_GT2 0x0412
#define PCI_CHIP_HASWELL_D_GT3 0x0422
#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422
#define PCI_CHIP_HASWELL_M_GT1 0x0406
#define PCI_CHIP_HASWELL_M_GT2 0x0416
#define PCI_CHIP_HASWELL_M_GT3 0x0426
#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426
#define PCI_CHIP_HASWELL_S_GT1 0x040A
#define PCI_CHIP_HASWELL_S_GT2 0x041A
#define PCI_CHIP_HASWELL_S_GT3 0x042A
#define PCI_CHIP_HASWELL_B_GT1 0x040B
#define PCI_CHIP_HASWELL_B_GT2 0x041B
#define PCI_CHIP_HASWELL_B_GT3 0x042B
#define PCI_CHIP_HASWELL_E_GT1 0x040E
#define PCI_CHIP_HASWELL_E_GT2 0x041E
#define PCI_CHIP_HASWELL_E_GT3 0x042E
 
#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A
#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02
#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12
#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16
#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A
#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02
#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12
#define PCI_CHIP_HASWELL_ULT_D_GT3 0x0A22
#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26
#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A
#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B
#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B
#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B
#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E
#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E
#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22
#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
 
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D02
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT3 0x0D22
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A
#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B
#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B
#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B
#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E
#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E
#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30
#define PCI_CHIP_VALLEYVIEW_1 0x0f31
#define PCI_CHIP_VALLEYVIEW_2 0x0f32
#define PCI_CHIP_VALLEYVIEW_3 0x0f33
 
#endif
 
#define I85X_CAPID 0x44
#define I85X_VARIANT_MASK 0x7
#define I85X_VARIANT_SHIFT 5
#define I855_GME 0x0
#define I855_GM 0x4
#define I852_GME 0x2
#define I852_GM 0x5
 
#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr
#define VENDOR_ID(p) (p)->vendor_id
#define DEVICE_ID(p) (p)->device_id
#define SUBVENDOR_ID(p) (p)->subvendor_id
#define SUBSYS_ID(p) (p)->subdevice_id
#define CHIP_REVISION(p) (p)->revision
 
#define INTEL_INFO(intel) ((intel)->info)
#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
#define IS_GEN1(intel) IS_GENx(intel, 1)
#define IS_GEN2(intel) IS_GENx(intel, 2)
#define IS_GEN3(intel) IS_GENx(intel, 3)
#define IS_GEN4(intel) IS_GENx(intel, 4)
#define IS_GEN5(intel) IS_GENx(intel, 5)
#define IS_GEN6(intel) IS_GENx(intel, 6)
#define IS_GEN7(intel) IS_GENx(intel, 7)
#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
 
/* Some chips have specific errata (or limits) that we need to workaround. */
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G)
#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G)
 
#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G)
#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM)
 
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
 
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
 
struct intel_device_info {
int gen;
};
/drivers/video/Intel-2D/intel_list.h
107,7 → 107,6
* There are no requirements for a list head, any struct list can be a list
* head.
*/
 
struct list {
struct list *next, *prev;
};