Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4500 → Rev 4501

/contrib/sdk/sources/Intel-2D/Makefile
13,6 → 13,7
LDFLAGS:= -shared -s -nostdlib -T ../newlib/dll.lds --entry _DllStartup --image-base=0
PXFLAGS:= --version-script pixlib.ver --output-def $(LIBRARY).orig.def --out-implib $(LIBRARY).dll.a
SNAFLAGS:= --version-script sna.ver --output-def sna.def
UXAFLAGS:= --version-script uxa.ver --output-def uxa.def
INCLUDES= -I. -I../libdrm/intel -I../libdrm/include/drm -I./render_program -I../pixman -I../newlib/include
 
25,7 → 26,10
 
SRC_PIXLIB = pixlib.c
 
SRC_SNA = sna/gen3_render.c \
SRC_SNA = \
sna/gen4_common.c \
sna/gen6_common.c \
sna/gen3_render.c \
sna/gen4_render.c \
sna/gen4_vertex.c \
sna/gen5_render.c \
70,7 → 74,7
# targets
 
all:$(LIBRARY).dll intel-sna.drv
uxa:$(LIBRARY).dll
uxa:$(LIBRARY).dll intel-uxa.drv
ebox:$(LIBRARY).dll
 
 
86,7 → 90,7
mv -f $@ ../../bin
 
intel-uxa.drv: $(OBJ_UXA) Makefile
$(LD) $(LDFLAGS) $(LIBPATH) -o $@ $(OBJ_UXA) $(LIBS)
$(LD) $(LDFLAGS) $(UXAFLAGS) $(LIBPATH) -o $@ $(OBJ_UXA) $(LIBS)
$(STRIP) $@
mv -f $@ ../../bin
/contrib/sdk/sources/Intel-2D/intel_driver.h
118,5 → 118,6
 
const struct intel_device_info *intel_detect_chipset(struct pci_device *pci);
 
#define hosted() (0)
 
#endif /* INTEL_DRIVER_H */
/contrib/sdk/sources/Intel-2D/intel_list.h
261,7 → 261,7
* @return True if the list contains one or more elements or False otherwise.
*/
static inline bool
list_is_empty(struct list *head)
list_is_empty(const struct list *head)
{
return head->next == head;
}
/contrib/sdk/sources/Intel-2D/sna/brw/brw_wm.c
521,7 → 521,7
if (p->gen >= 060) {
/* First compute 1/z */
brw_PLN(p,
brw_message_reg(msg),
brw_vec8_grf(30, 0),
brw_vec1_grf(uv+1, 0),
brw_vec8_grf(2, 0));
 
532,22 → 532,22
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
 
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec8_grf(26, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(4, 0));
 
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(26, 0),
brw_vec8_grf(30, 0));
brw_MUL(p,
brw_message_reg(msg + dw/8),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
} else {
/contrib/sdk/sources/Intel-2D/sna/gen3_render.c
1459,7 → 1459,7
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map)) {
} else if (sna->render.vertices == MAP(bo->map__cpu)) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
if (sna->render.vertices == NULL) {
1657,7 → 1657,123
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen3_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__,
op->src.offset[0], op->src.offset[1],
op->mask.offset[0], op->mask.offset[1],
op->dst.x, op->dst.y));
 
gen3_get_rectangles(sna, op, 1);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen3_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->src.offset[0], op->src.offset[1],
op->mask.offset[0], op->mask.offset[1],
op->dst.x, op->dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen3_get_rectangles(sna, op, nbox);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
r.dst.x = box->x1; r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen3_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen3_get_rectangles(sna, op, nbox);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen3_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen3_get_rectangles(sna, op, nbox);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
#endif
 
static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
1702,8 → 1818,7
state->last_floats_per_vertex = 0;
state->last_vertex_offset = 0;
 
if (sna->render.vbo != NULL &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding vbo as next access will stall: %d\n",
__FUNCTION__, sna->render.vbo->presumed_offset));
discard_vbo(sna);
1755,7 → 1870,286
}
 
#if 0
static bool source_is_covered(PicturePtr picture,
int x, int y,
int width, int height)
{
int x1, y1, x2, y2;
 
if (picture->repeat && picture->repeatType != RepeatNone)
return true;
 
if (picture->pDrawable == NULL)
return false;
 
if (picture->transform) {
pixman_box16_t sample;
 
sample.x1 = x;
sample.y1 = y;
sample.x2 = x + width;
sample.y2 = y + height;
 
pixman_transform_bounds(picture->transform, &sample);
 
x1 = sample.x1;
x2 = sample.x2;
y1 = sample.y1;
y2 = sample.y2;
} else {
x1 = x;
y1 = y;
x2 = x + width;
y2 = y + height;
}
 
return
x1 >= 0 && y1 >= 0 &&
x2 <= picture->pDrawable->width &&
y2 <= picture->pDrawable->height;
}
 
static bool gen3_composite_channel_set_xformat(PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int width, int height)
{
unsigned int i;
 
if (PICT_FORMAT_A(picture->format) != 0)
return false;
 
if (width == 0 || height == 0)
return false;
 
if (!source_is_covered(picture, x, y, width, height))
return false;
 
for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
if (gen3_tex_formats[i].xfmt == picture->format) {
channel->card_format = gen3_tex_formats[i].card_fmt;
channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
channel->alpha_fixup = true;
return true;
}
}
 
return false;
}
 
static int
gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
{
channel->u.gen3.mode = color;
channel->u.gen3.type = SHADER_CONSTANT;
if (color == 0)
channel->u.gen3.type = SHADER_ZERO;
else if (color == 0xff000000)
channel->u.gen3.type = SHADER_BLACK;
else if (color == 0xffffffff)
channel->u.gen3.type = SHADER_WHITE;
 
channel->bo = NULL;
channel->is_opaque = (color >> 24) == 0xff;
channel->is_affine = 1;
channel->alpha_fixup = 0;
channel->rb_reversed = 0;
 
DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
__FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
 
/* for consistency */
channel->repeat = RepeatNormal;
channel->filter = PictFilterNearest;
channel->pict_format = PICT_a8r8g8b8;
channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
 
return 1;
}
 
static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
{
if (channel->u.gen3.type == SHADER_TEXTURE)
channel->repeat = gen3_texture_repeat(channel->repeat);
else
channel->repeat = gen3_gradient_repeat(channel->repeat);
 
channel->filter = gen3_filter(channel->filter);
if (channel->card_format == 0)
gen3_composite_channel_set_format(channel, channel->pict_format);
assert(channel->card_format);
}
 
static bool gen3_gradient_setup(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t ox, int16_t oy)
{
int16_t dx, dy;
 
if (picture->repeat == 0) {
channel->repeat = RepeatNone;
} else switch (picture->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
channel->repeat = picture->repeatType;
break;
default:
return false;
}
 
channel->bo =
sna_render_get_gradient(sna,
(PictGradient *)picture->pSourcePict);
if (channel->bo == NULL)
return false;
 
channel->pict_format = PICT_a8r8g8b8;
channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
channel->filter = PictFilterNearest;
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
ox += dx;
oy += dy;
channel->transform = NULL;
} else
channel->transform = picture->transform;
channel->width = channel->bo->pitch / 4;
channel->height = 1;
channel->offset[0] = ox;
channel->offset[1] = oy;
channel->scale[0] = channel->scale[1] = 1;
return true;
}
 
static int
gen3_init_linear(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
struct sna_composite_channel *channel,
int ox, int oy)
{
PictLinearGradient *linear =
(PictLinearGradient *)picture->pSourcePict;
float x0, y0, sf;
float dx, dy, offset;
int n;
 
DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
__FUNCTION__,
xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
 
if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
return 0;
 
dx = xFixedToDouble(linear->p2.x - linear->p1.x);
dy = xFixedToDouble(linear->p2.y - linear->p1.y);
sf = dx*dx + dy*dy;
dx /= sf;
dy /= sf;
 
x0 = xFixedToDouble(linear->p1.x);
y0 = xFixedToDouble(linear->p1.y);
offset = dx*x0 + dy*y0;
 
n = op->u.gen3.num_constants;
channel->u.gen3.constants = FS_C0 + n / 4;
op->u.gen3.constants[n++] = dx;
op->u.gen3.constants[n++] = dy;
op->u.gen3.constants[n++] = -offset;
op->u.gen3.constants[n++] = 0;
 
if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
return -1;
 
channel->u.gen3.type = SHADER_LINEAR;
op->u.gen3.num_constants = n;
 
DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
__FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
return 1;
}
 
static int
gen3_init_radial(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
struct sna_composite_channel *channel,
int ox, int oy)
{
PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
double dx, dy, dr, r1;
int n;
 
dx = xFixedToDouble(radial->c2.x - radial->c1.x);
dy = xFixedToDouble(radial->c2.y - radial->c1.y);
dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
 
r1 = xFixedToDouble(radial->c1.radius);
 
n = op->u.gen3.num_constants;
channel->u.gen3.constants = FS_C0 + n / 4;
if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
if (radial->c2.radius == radial->c1.radius) {
channel->u.gen3.type = SHADER_ZERO;
return 1;
}
 
op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
op->u.gen3.constants[n++] = 1. / dr;
op->u.gen3.constants[n++] = -r1 / dr;
 
channel->u.gen3.mode = RADIAL_ONE;
} else {
op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
op->u.gen3.constants[n++] = r1;
op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
 
op->u.gen3.constants[n++] = -2 * dx;
op->u.gen3.constants[n++] = -2 * dy;
op->u.gen3.constants[n++] = -2 * r1 * dr;
op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
 
channel->u.gen3.mode = RADIAL_TWO;
}
 
if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
return -1;
 
channel->u.gen3.type = SHADER_RADIAL;
op->u.gen3.num_constants = n;
return 1;
}
 
static bool
sna_picture_is_clear(PicturePtr picture,
int x, int y, int w, int h,
uint32_t *color)
{
struct sna_pixmap *priv;
 
if (!picture->pDrawable)
return false;
 
priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
if (priv == NULL || !priv->clear)
return false;
 
if (!source_is_covered(picture, x, y, w, h))
return false;
 
*color = priv->clear_color;
return true;
}
 
static int
gen3_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
1874,74 → 2268,37
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
#endif
 
static inline bool
source_use_blt(struct sna *sna, PicturePtr picture)
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
/* If it is a solid, try to use the BLT paths */
if (!picture->pDrawable)
return picture->pSourcePict->type == SourcePictTypeSolidFill;
int vertex_index;
 
if (picture->pDrawable->width == 1 &&
picture->pDrawable->height == 1 &&
picture->repeat)
return true;
if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex)
return;
 
if (too_large(picture->pDrawable->width, picture->pDrawable->height))
return true;
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex));
 
return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
assert(op->floats_per_rect == 3*op->floats_per_vertex);
 
static bool
try_blt(struct sna *sna,
PicturePtr dst,
PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
__FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
if (gen3_vertex_finish(sna) < op->floats_per_vertex)
kgem_submit(&sna->kgem);
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
DBG(("%s: target too large for 3D pipe (%d, %d)\n",
__FUNCTION__,
dst->pDrawable->width, dst->pDrawable->height));
return true;
sna->render.vertex_index = vertex_index;
sna->render.vertex_used = vertex_index * op->floats_per_vertex;
}
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return source_use_blt(sna, src);
}
#endif
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
}
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
2059,6 → 2416,58
return dst_use_cpu(dst_pixmap);
}
 
static int
reuse_source(struct sna *sna,
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
{
if (src_x != msk_x || src_y != msk_y)
return false;
 
if (mask == src) {
*mc = *sc;
if (mc->bo)
kgem_bo_reference(mc->bo);
return true;
}
 
if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
return false;
 
if (sc->is_solid)
return false;
 
DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
if (!sna_transform_equal(src->transform, mask->transform))
return false;
 
if (!sna_picture_alphamap_equal(src, mask))
return false;
 
if (!gen3_check_repeat(mask))
return false;
 
if (!gen3_check_filter(mask))
return false;
 
if (!gen3_check_format(mask))
return false;
 
DBG(("%s: reusing source channel for mask with a twist\n",
__FUNCTION__));
 
*mc = *sc;
mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
mc->filter = gen3_filter(mask->filter);
mc->pict_format = mask->format;
gen3_composite_channel_set_format(mc, mask->format);
assert(mc->card_format);
if (mc->bo)
kgem_bo_reference(mc->bo);
return true;
}
 
static bool
gen3_render_composite(struct sna *sna,
uint8_t op,
2083,7 → 2492,6
* 3D -> 2D context switch.
*/
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna,
op, src, dst,
src_x, src_y,
2093,7 → 2501,7
return true;
 
if (gen3_composite_fallback(sna, op, src, mask, dst))
return false;
goto fallback;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
2117,7 → 2525,7
if (!sna_render_composite_redirect(sna, tmp,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
goto fallback;
}
 
tmp->u.gen3.num_constants = 0;
2406,8 → 2814,8
goto cleanup_mask;
}
 
gen3_align_vertex(sna, tmp);
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
2419,221 → 2827,1720
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
fallback:
return (mask == NULL &&
sna_blt_composite(sna,
op, src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, true));
}
#endif
 
static void
gen3_emit_composite_spans_vertex(struct sna *sna,
const struct sna_composite_spans_op *op,
int16_t x, int16_t y,
float opacity)
{
gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
OUT_VERTEX(opacity);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_zero(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
 
v[2] = op->base.dst.x + box->x1;
v[3] = v[1];
 
v[4] = v[2];
v[5] = op->base.dst.x + box->y1;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox, float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
 
v[2] = op->base.dst.x + b->box.x1;
v[3] = v[1];
 
v[4] = v[2];
v[5] = op->base.dst.x + b->box.y1;
 
v += 6;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
 
v[0] = box->x2;
v[3] = v[1] = box->y2;
v[4] = v[2] = box->x1;
v[5] = box->y1;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox, float *v)
{
do {
v[0] = b->box.x2;
v[3] = v[1] = b->box.y2;
v[4] = v[2] = b->box.x1;
v[5] = b->box.y1;
 
b++;
v += 6;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_constant(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = op->base.dst.x + box->x2;
v[6] = v[3] = op->base.dst.x + box->x1;
v[4] = v[1] = op->base.dst.y + box->y2;
v[7] = op->base.dst.y + box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[6] = v[3] = op->base.dst.x + b->box.x1;
v[4] = v[1] = op->base.dst.y + b->box.y2;
v[7] = op->base.dst.y + b->box.y1;
v[8] = v[5] = v[2] = b->alpha;
 
v += 9;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = box->x2;
v[6] = v[3] = box->x1;
v[4] = v[1] = box->y2;
v[7] = box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox, float *v)
{
do {
v[0] = b->box.x2;
v[6] = v[3] = b->box.x1;
v[4] = v[1] = b->box.y2;
v[7] = b->box.y1;
v[8] = v[5] = v[2] = b->alpha;
 
v += 9;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
v[8] = v[3];
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
v[12] = v[7];
v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
v[14] = opacity;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
v[8] = v[3];
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
v[12] = v[7];
v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
v[14] = b->alpha;
 
v += 15;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
PictTransform *transform = op->base.src.transform;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[6] = v[1] = op->base.dst.y + box->y2;
v[10] = v[5] = op->base.dst.x + box->x1;
v[11] = op->base.dst.y + box->y1;
v[14] = v[9] = v[4] = opacity;
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
(int)op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[2], &v[3]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
(int)op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[7], &v[8]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
(int)op->base.src.offset[1] + box->y1,
transform, op->base.src.scale,
&v[12], &v[13]);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
PictTransform *transform = op->base.src.transform;
 
do {
v[0] = op->base.dst.x + b->box.x2;
v[6] = v[1] = op->base.dst.y + b->box.y2;
v[10] = v[5] = op->base.dst.x + b->box.x1;
v[11] = op->base.dst.y + b->box.y1;
v[14] = v[9] = v[4] = b->alpha;
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
(int)op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[2], &v[3]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
(int)op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[7], &v[8]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
(int)op->base.src.offset[1] + b->box.y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v += 15;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
v[2] = op->base.src.offset[0] + box->x2;
v[3] = op->base.src.offset[1] + box->y2;
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
v[7] = op->base.src.offset[0] + box->x1;
v[8] = v[3];
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
v[12] = v[7];
v[13] = op->base.src.offset[1] + box->y1;
v[14] = opacity;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
v[2] = op->base.src.offset[0] + b->box.x2;
v[3] = op->base.src.offset[1] + b->box.y2;
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
v[7] = op->base.src.offset[0] + b->box.x1;
v[8] = v[3];
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
v[12] = v[7];
v[13] = op->base.src.offset[1] + b->box.y1;
v[14] = b->alpha;
 
v += 15;
b++;
} while (--nbox);
}
 
#if defined(sse2) && !defined(__x86_64__)
sse2 fastcall static void
gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = op->base.dst.x + box->x2;
v[6] = v[3] = op->base.dst.x + box->x1;
v[4] = v[1] = op->base.dst.y + box->y2;
v[7] = op->base.dst.y + box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[6] = v[3] = op->base.dst.x + b->box.x1;
v[4] = v[1] = op->base.dst.y + b->box.y2;
v[7] = op->base.dst.y + b->box.y1;
v[8] = v[5] = v[2] = b->alpha;
 
v += 9;
b++;
} while (--nbox);
}
 
sse2 fastcall static void
gen3_render_composite_spans_constant_box__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
float *v;
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen3_get_rectangles(sna, &op->base, 1);
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = box->x2;
v[6] = v[3] = box->x1;
v[4] = v[1] = box->y2;
v[7] = box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
sse2 fastcall static void
gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * 9;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
do {
v[0] = box->box.x2;
v[6] = v[3] = box->box.x1;
v[4] = v[1] = box->box.y2;
v[7] = box->box.y1;
v[8] = v[5] = v[2] = box->alpha;
v += 9;
box++;
} while (--nbox_this_time);
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = box->x2;
v[6] = v[3] = box->x1;
v[4] = v[1] = box->y2;
v[7] = box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox, float *v)
{
do {
v[0] = b->box.x2;
v[6] = v[3] = b->box.x1;
v[4] = v[1] = b->box.y2;
v[7] = b->box.y1;
v[8] = v[5] = v[2] = b->alpha;
 
v += 9;
b++;
} while (--nbox);
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
v[8] = v[3];
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
v[12] = v[7];
v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
v[14] = opacity;
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
v[8] = v[3];
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
v[12] = v[7];
v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
v[14] = b->alpha;
 
v += 15;
b++;
} while (--nbox);
}
sse2 fastcall static void
gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
PictTransform *transform = op->base.src.transform;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[6] = v[1] = op->base.dst.y + box->y2;
v[10] = v[5] = op->base.dst.x + box->x1;
v[11] = op->base.dst.y + box->y1;
v[14] = v[9] = v[4] = opacity;
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
(int)op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[2], &v[3]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
(int)op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[7], &v[8]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
(int)op->base.src.offset[1] + box->y1,
transform, op->base.src.scale,
&v[12], &v[13]);
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
PictTransform *transform = op->base.src.transform;
 
do {
v[0] = op->base.dst.x + b->box.x2;
v[6] = v[1] = op->base.dst.y + b->box.y2;
v[10] = v[5] = op->base.dst.x + b->box.x1;
v[11] = op->base.dst.y + b->box.y1;
v[14] = v[9] = v[4] = b->alpha;
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
(int)op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[2], &v[3]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
(int)op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[7], &v[8]);
 
_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
(int)op->base.src.offset[1] + b->box.y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v += 15;
b++;
} while (--nbox);
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
v[2] = op->base.src.offset[0] + box->x2;
v[3] = op->base.src.offset[1] + box->y2;
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
v[7] = op->base.src.offset[0] + box->x1;
v[8] = v[3];
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
v[12] = v[7];
v[13] = op->base.src.offset[1] + box->y1;
v[14] = opacity;
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
v[2] = op->base.src.offset[0] + b->box.x2;
v[3] = op->base.src.offset[1] + b->box.y2;
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
v[7] = op->base.src.offset[0] + b->box.x1;
v[8] = v[3];
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
v[12] = v[7];
v[13] = op->base.src.offset[1] + b->box.y1;
v[14] = b->alpha;
 
v += 15;
b++;
} while (--nbox);
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
PictTransform *transform = op->base.src.transform;
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[2], &v[3]);
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[7], &v[8]);
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
op->base.src.offset[1] + box->y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v[14] = opacity;
}
 
sse2 fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
PictTransform *transform = op->base.src.transform;
 
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[2], &v[3]);
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[7], &v[8]);
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
op->base.src.offset[1] + b->box.y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v[14] = b->alpha;
v += 15;
b++;
} while (--nbox);
}
#endif
 
fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
PictTransform *transform = op->base.src.transform;
float *v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
v[0] = op->base.dst.x + box->x2;
v[1] = op->base.dst.y + box->y2;
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[2], &v[3]);
v[4] = opacity;
 
v[5] = op->base.dst.x + box->x1;
v[6] = v[1];
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
op->base.src.offset[1] + box->y2,
transform, op->base.src.scale,
&v[7], &v[8]);
v[9] = opacity;
 
v[10] = v[5];
v[11] = op->base.dst.y + box->y1;
_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
op->base.src.offset[1] + box->y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v[14] = opacity;
}
 
fastcall static void
gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
const struct sna_opacity_box *b,
int nbox,
float *v)
{
PictTransform *transform = op->base.src.transform;
 
do {
v[0] = op->base.dst.x + b->box.x2;
v[1] = op->base.dst.y + b->box.y2;
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[2], &v[3]);
v[4] = b->alpha;
 
v[5] = op->base.dst.x + b->box.x1;
v[6] = v[1];
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
op->base.src.offset[1] + b->box.y2,
transform, op->base.src.scale,
&v[7], &v[8]);
v[9] = b->alpha;
 
v[10] = v[5];
v[11] = op->base.dst.y + b->box.y1;
_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
op->base.src.offset[1] + b->box.y1,
transform, op->base.src.scale,
&v[12], &v[13]);
v[14] = b->alpha;
v += 15;
b++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_spans_primitive(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box,
float opacity)
{
gen3_emit_composite_spans_vertex(sna, op,
box->x2, box->y2,
opacity);
gen3_emit_composite_spans_vertex(sna, op,
box->x1, box->y2,
opacity);
gen3_emit_composite_spans_vertex(sna, op,
box->x1, box->y1,
opacity);
}
 
fastcall static void
gen3_render_composite_spans_constant_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
float *v;
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen3_get_rectangles(sna, &op->base, 1);
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
v[0] = box->x2;
v[6] = v[3] = box->x1;
v[4] = v[1] = box->y2;
v[7] = box->y1;
v[8] = v[5] = v[2] = opacity;
}
 
fastcall static void
gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * 9;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
do {
v[0] = box->box.x2;
v[6] = v[3] = box->box.x1;
v[4] = v[1] = box->box.y2;
v[7] = box->box.y1;
v[8] = v[5] = v[2] = box->alpha;
v += 9;
box++;
} while (--nbox_this_time);
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen3_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen3_get_rectangles(sna, &op->base, 1);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen3_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen3_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen3_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
if (sna->render.vertex_offset)
gen3_vertex_flush(sna);
 
DBG(("%s()\n", __FUNCTION__));
 
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen3_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height, unsigned flags)
{
if (op >= ARRAY_SIZE(gen3_blend_op))
return false;
 
if (gen3_composite_fallback(sna, op, src, NULL, dst))
return false;
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
return true;
}
 
static bool
gen3_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
bool no_offset;
 
DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
src_x, src_y, dst_x, dst_y, width, height));
 
assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
if (!gen3_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height)) {
DBG(("%s: unable to set render target\n",
__FUNCTION__));
return false;
}
 
tmp->base.op = op;
tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
!gen3_check_pitch_3d(tmp->base.dst.bo)) {
if (!sna_render_composite_redirect(sna, &tmp->base,
dst_x, dst_y, width, height,
true))
return false;
}
 
tmp->base.src.u.gen3.type = SHADER_TEXTURE;
tmp->base.src.is_affine = true;
DBG(("%s: preparing source\n", __FUNCTION__));
switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
tmp->base.src.u.gen3.type = SHADER_ZERO;
break;
case 1:
gen3_composite_channel_convert(&tmp->base.src);
break;
}
DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
 
if (tmp->base.src.u.gen3.type != SHADER_ZERO)
tmp->base.mask.u.gen3.type = SHADER_OPACITY;
 
no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
tmp->box = gen3_render_composite_spans_box;
tmp->boxes = gen3_render_composite_spans_boxes;
tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
tmp->done = gen3_render_composite_spans_done;
tmp->prim_emit = gen3_emit_composite_spans_primitive;
switch (tmp->base.src.u.gen3.type) {
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
if (no_offset) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
} else {
tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
}
break;
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
if (no_offset) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->box = gen3_render_composite_spans_constant_box__sse2;
tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
} else
#endif
{
tmp->box = gen3_render_composite_spans_constant_box;
tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
}
}
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
if (tmp->base.src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
}
} else if (tmp->base.src.is_affine) {
tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
}
}
break;
case SHADER_TEXTURE:
if (tmp->base.src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
}
} else if (tmp->base.src.is_affine) {
tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
}
}
break;
}
if (tmp->emit_boxes == NULL)
tmp->thread_boxes = NULL;
 
tmp->base.mask.bo = NULL;
 
tmp->base.floats_per_vertex = 2;
if (!is_constant_ps(tmp->base.src.u.gen3.type))
tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
tmp->base.floats_per_vertex +=
tmp->base.mask.u.gen3.type == SHADER_OPACITY;
tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
 
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
}
 
gen3_align_vertex(sna, &tmp->base);
gen3_emit_composite_state(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
 
static void
gen3_emit_video_state(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
PixmapPtr pixmap,
struct kgem_bo *dst_bo,
int width, int height,
bool bilinear)
{
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t id, ms3, rewind;
 
gen3_emit_target(sna, dst_bo, width, height,
sna_format_for_depth(pixmap->drawable.depth));
 
/* XXX share with composite? Is it worth the effort? */
if ((state->last_shader & (1<<31)) == 0) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
2);
OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
S6_COLOR_WRITE_ENABLE);
 
state->last_blend = 0;
state->floats_per_vertex = 4;
}
 
if (!is_planar_fourcc(frame->id)) {
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
OUT_BATCH(0x0000001); /* constant 0 */
/* constant 0: brightness/contrast */
OUT_BATCH_F(video->brightness / 128.0);
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
if (state->last_constants &&
memcmp(&sna->kgem.batch[state->last_constants],
&sna->kgem.batch[rewind],
6*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_constants = rewind;
 
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
OUT_BATCH(0x00000001);
OUT_BATCH(SS2_COLORSPACE_CONVERSION |
(FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler],
&sna->kgem.batch[rewind],
5*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
 
OUT_BATCH(_3DSTATE_MAP_STATE | 3);
OUT_BATCH(0x00000001); /* texture map #1 */
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
frame->bo,
I915_GEM_DOMAIN_SAMPLER << 16,
0));
 
ms3 = MAPSURF_422;
switch (frame->id) {
case FOURCC_YUY2:
ms3 |= MT_422_YCRCB_NORMAL;
break;
case FOURCC_UYVY:
ms3 |= MT_422_YCRCB_SWAPY;
break;
}
ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
 
id = 1<<31 | 1<<1 | !!video->brightness;
if (state->last_shader != id) {
state->last_shader = id;
id = sna->kgem.nbatch++;
 
gen3_fs_dcl(FS_S0);
gen3_fs_dcl(FS_T0);
gen3_fs_texld(FS_OC, FS_S0, FS_T0);
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C0, X, X, X, ZERO));
}
 
sna->kgem.batch[id] =
_3DSTATE_PIXEL_SHADER_PROGRAM |
(sna->kgem.nbatch - id - 2);
}
} else {
/* For the planar formats, we set up three samplers --
* one for each plane, in a Y8 format. Because I
* couldn't get the special PLANAR_TO_PACKED
* shader setup to work, I did the manual pixel shader:
*
* y' = y - .0625
* u' = u - .5
* v' = v - .5;
*
* r = 1.1643 * y' + 0.0 * u' + 1.5958 * v'
* g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
* b = 1.1643 * y' + 2.017 * u' + 0.0 * v'
*
* register assignment:
* r0 = (y',u',v',0)
* r1 = (y,y,y,y)
* r2 = (u,u,u,u)
* r3 = (v,v,v,v)
* OC = (r,g,b,1)
*/
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
OUT_BATCH(0x000001f); /* constants 0-4 */
/* constant 0: normalization offsets */
OUT_BATCH_F(-0.0625);
OUT_BATCH_F(-0.5);
OUT_BATCH_F(-0.5);
OUT_BATCH_F(0.0);
/* constant 1: r coefficients */
OUT_BATCH_F(1.1643);
OUT_BATCH_F(0.0);
OUT_BATCH_F(1.5958);
OUT_BATCH_F(0.0);
/* constant 2: g coefficients */
OUT_BATCH_F(1.1643);
OUT_BATCH_F(-0.39173);
OUT_BATCH_F(-0.81290);
OUT_BATCH_F(0.0);
/* constant 3: b coefficients */
OUT_BATCH_F(1.1643);
OUT_BATCH_F(2.017);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
/* constant 4: brightness/contrast */
OUT_BATCH_F(video->brightness / 128.0);
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
if (state->last_constants &&
memcmp(&sna->kgem.batch[state->last_constants],
&sna->kgem.batch[rewind],
22*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_constants = rewind;
 
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
OUT_BATCH(0x00000007);
/* sampler 0 */
OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
/* sampler 1 */
OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
(1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
/* sampler 2 */
OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
(FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
(TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
(2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler],
&sna->kgem.batch[rewind],
11*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
 
OUT_BATCH(_3DSTATE_MAP_STATE | 9);
OUT_BATCH(0x00000007);
 
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
frame->bo,
I915_GEM_DOMAIN_SAMPLER << 16,
0));
 
ms3 = MAPSURF_8BIT | MT_8BIT_I8;
ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
OUT_BATCH(ms3);
/* check to see if Y has special pitch than normal
* double u/v pitch, e.g i915 XvMC hw requires at
* least 1K alignment, so Y pitch might
* be same as U/V's.*/
if (frame->pitch[1])
OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
else
OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
 
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
frame->bo,
I915_GEM_DOMAIN_SAMPLER << 16,
frame->UBufOffset));
 
ms3 = MAPSURF_8BIT | MT_8BIT_I8;
ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
 
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
frame->bo,
I915_GEM_DOMAIN_SAMPLER << 16,
frame->VBufOffset));
 
ms3 = MAPSURF_8BIT | MT_8BIT_I8;
ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
 
id = 1<<31 | 2<<1 | !!video->brightness;
if (state->last_shader != id) {
state->last_shader = id;
id = sna->kgem.nbatch++;
 
/* Declare samplers */
gen3_fs_dcl(FS_S0); /* Y */
gen3_fs_dcl(FS_S1); /* U */
gen3_fs_dcl(FS_S2); /* V */
gen3_fs_dcl(FS_T0); /* normalized coords */
 
/* Load samplers to temporaries. */
gen3_fs_texld(FS_R1, FS_S0, FS_T0);
gen3_fs_texld(FS_R2, FS_S1, FS_T0);
gen3_fs_texld(FS_R3, FS_S2, FS_T0);
 
/* Move the sampled YUV data in R[123] to the first
* 3 channels of R0.
*/
gen3_fs_mov_masked(FS_R0, MASK_X,
gen3_fs_operand_reg(FS_R1));
gen3_fs_mov_masked(FS_R0, MASK_Y,
gen3_fs_operand_reg(FS_R2));
gen3_fs_mov_masked(FS_R0, MASK_Z,
gen3_fs_operand_reg(FS_R3));
 
/* Normalize the YUV data */
gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C0));
/* dot-product the YUV data in R0 by the vectors of
* coefficients for calculating R, G, and B, storing
* the results in the R, G, or B channels of the output
* color. The OC results are implicitly clamped
* at the end of the program.
*/
gen3_fs_dp3(FS_OC, MASK_X,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C1));
gen3_fs_dp3(FS_OC, MASK_Y,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C2));
gen3_fs_dp3(FS_OC, MASK_Z,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C3));
/* Set alpha of the output to 1.0, by wiring W to 1
* and not actually using the source.
*/
gen3_fs_mov_masked(FS_OC, MASK_W,
gen3_fs_operand_one());
 
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C4, X, X, X, ZERO));
}
 
sna->kgem.batch[id] =
_3DSTATE_PIXEL_SHADER_PROGRAM |
(sna->kgem.nbatch - id - 2);
}
}
}
 
static void
gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 
if (!kgem_check_batch(&sna->kgem, 120) ||
!kgem_check_reloc(&sna->kgem, 4) ||
!kgem_check_exec(&sna->kgem, 2)) {
_kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen3.need_invariant)
gen3_emit_invariant(sna);
}
 
static int
gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
{
int size = floats_per_vertex * 3;
int rem = batch_space(sna) - 1;
 
if (size * want > rem)
want = rem / size;
 
return want;
}
 
static bool
gen3_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_pixmap *priv = sna_pixmap(pixmap);
BoxPtr pbox = REGION_RECTS(dstRegion);
int nbox = REGION_NUM_RECTS(dstRegion);
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int pix_xoff, pix_yoff;
struct kgem_bo *dst_bo;
bool bilinear;
int copy = 0;
 
DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
src_width, src_height, frame->width, frame->height, dst_width, dst_height));
 
dst_bo = priv->gpu_bo;
if (dst_bo == NULL)
return false;
 
bilinear = src_width != dst_width || src_height != dst_height;
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
__FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
!gen3_check_pitch_3d(dst_bo)) {
int bpp = pixmap->drawable.bitsPerPixel;
 
if (too_large(dst_width, dst_height))
return false;
 
dst_bo = kgem_create_2d(&sna->kgem,
dst_width, dst_height, bpp,
kgem_choose_tiling(&sna->kgem,
I915_TILING_X,
dst_width, dst_height, bpp),
0);
if (!dst_bo)
return false;
 
pix_xoff = -dstRegion->extents.x1;
pix_yoff = -dstRegion->extents.y1;
copy = 1;
} else {
/* Set up the offset for translating from the given region
* (in screen coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
dst_width = pixmap->drawable.width;
dst_height = pixmap->drawable.height;
}
 
gen3_video_get_batch(sna, dst_bo);
gen3_emit_video_state(sna, video, frame, pixmap,
dst_bo, dst_width, dst_height, bilinear);
do {
int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
if (nbox_this_time == 0) {
gen3_video_get_batch(sna, dst_bo);
gen3_emit_video_state(sna, video, frame, pixmap,
dst_bo, dst_width, dst_height, bilinear);
nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
assert(nbox_this_time);
}
nbox -= nbox_this_time;
 
OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
do {
int box_x1 = pbox->x1;
int box_y1 = pbox->y1;
int box_x2 = pbox->x2;
int box_y2 = pbox->y2;
 
pbox++;
 
DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
__FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
box_x1 * src_scale_x + src_offset_x,
box_y1 * src_scale_y + src_offset_y,
box_x2 * src_scale_x + src_offset_x,
box_y2 * src_scale_y + src_offset_y));
 
/* bottom right */
OUT_BATCH_F(box_x2 + pix_xoff);
OUT_BATCH_F(box_y2 + pix_yoff);
OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
 
/* bottom left */
OUT_BATCH_F(box_x1 + pix_xoff);
OUT_BATCH_F(box_y2 + pix_yoff);
OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
 
/* top left */
OUT_BATCH_F(box_x1 + pix_xoff);
OUT_BATCH_F(box_y1 + pix_yoff);
OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
} while (--nbox_this_time);
} while (nbox);
 
if (copy) {
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
sna_blt_copy_boxes(sna, GXcopy,
dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
priv->gpu_bo, pix_xoff, pix_yoff,
pixmap->drawable.bitsPerPixel,
REGION_RECTS(dstRegion),
REGION_NUM_RECTS(dstRegion));
 
kgem_bo_destroy(&sna->kgem, dst_bo);
}
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
if ((pix_xoff | pix_yoff) == 0) {
sna_damage_add(&priv->gpu_damage, dstRegion);
sna_damage_subtract(&priv->cpu_damage, dstRegion);
} else {
sna_damage_add_boxes(&priv->gpu_damage,
REGION_RECTS(dstRegion),
REGION_NUM_RECTS(dstRegion),
pix_xoff, pix_yoff);
sna_damage_subtract_boxes(&priv->cpu_damage,
REGION_RECTS(dstRegion),
REGION_NUM_RECTS(dstRegion),
pix_xoff, pix_yoff);
}
}
 
return true;
}
 
#endif
 
 
 
2658,6 → 4565,140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
#if 0
static bool
gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1,
int16_t x2, int16_t y2,
uint8_t alu)
{
struct sna_composite_op tmp;
 
#if NO_FILL_ONE
return gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
#endif
 
/* Prefer to use the BLT if already engaged */
if (prefer_fill_blt(sna) &&
gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height) ||
bo->pitch > MAX_3D_PITCH)
return gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
 
if (alu == GXclear)
color = 0;
 
tmp.op = color == 0 ? PictOpClear : PictOpSrc;
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = 0;
tmp.has_component_alpha = 0;
tmp.rb_reversed = 0;
 
gen3_init_solid(&tmp.src,
sna_rgba_for_color(color, dst->drawable.depth));
tmp.mask.bo = NULL;
tmp.mask.u.gen3.type = SHADER_NONE;
tmp.u.gen3.num_constants = 0;
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
 
if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
 
if (!kgem_check_bo(&sna->kgem, bo, NULL))
return false;
}
 
gen3_align_vertex(sna, &tmp);
gen3_emit_composite_state(sna, &tmp);
gen3_get_rectangles(sna, &tmp, 1);
DBG((" (%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
OUT_VERTEX(x2);
OUT_VERTEX(y2);
OUT_VERTEX(x1);
OUT_VERTEX(y2);
OUT_VERTEX(x1);
OUT_VERTEX(y1);
gen3_vertex_flush(sna);
 
return true;
}
#endif
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
2808,7 → 4849,7
kgem_submit(&sna->kgem);
}
 
gen3_align_vertex(sna, tmp);
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
}
/contrib/sdk/sources/Intel-2D/sna/gen4_common.c
0,0 → 1,64
/*
* Copyright © 2011-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "gen4_common.h"
#include "gen4_vertex.h"
 
void gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
void gen4_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.nvertex_reloc == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
void gen4_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
/contrib/sdk/sources/Intel-2D/sna/gen4_common.h
0,0 → 1,49
/*
* Copyright © 2011-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef GEN4_COMMON_H
#define GEN4_COMMON_H
 
#include "sna.h"
 
inline static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
void gen4_render_flush(struct sna *sna);
void gen4_render_retire(struct kgem *kgem);
void gen4_render_expire(struct kgem *kgem);
 
#endif /* GEN4_COMMON_H */
 
/contrib/sdk/sources/Intel-2D/sna/gen4_render.c
41,6 → 41,7
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen4_common.h"
#include "gen4_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
549,9 → 550,6
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
747,16 → 745,10
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
sna->render_state.gen4.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
op->floats_per_vertex));
gen4_vertex_align(sna, op);
sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
}
}
1314,11 → 1306,12
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
return false;
}
 
gen4_align_vertex(sna, &tmp);
gen4_video_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
1549,33 → 1542,6
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height))
return true;
 
/* The blitter is much faster for solids */
if (sna_picture_is_solid(src, NULL))
return true;
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return picture_is_cpu(sna, src);
}
 
static bool
check_gradient(PicturePtr picture, bool precise)
{
switch (picture->pSourcePict->type) {
1803,7 → 1769,6
return false;
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
1932,8 → 1897,8
goto cleanup_mask;
}
 
gen4_align_vertex(sna, tmp);
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
1990,51 → 1955,6
 
 
 
static void
gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen4_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen4_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen4_render_reset(struct sna *sna)
{
sna->render_state.gen4.needs_invariant = true;
2047,8 → 1967,7
sna->render_state.gen4.drawrect_limit = -1;
sna->render_state.gen4.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
2407,8 → 2326,8
kgem_submit(&sna->kgem);
}
 
gen4_align_vertex(sna, tmp);
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.c
38,6 → 38,29
#define sse2
#endif
 
void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op)
{
int vertex_index;
 
assert(op->floats_per_rect == 3*op->floats_per_vertex);
 
vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
__FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
if (gen4_vertex_finish(sna) < op->floats_per_rect) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
}
 
sna->render.vertex_index = vertex_index;
sna->render.vertex_used = vertex_index * op->floats_per_vertex;
}
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
45,7 → 68,9
sna->render.vertex_index - sna->render.vertex_start));
 
assert(sna->render.vertex_offset);
assert(sna->render.vertex_offset <= sna->kgem.nbatch);
assert(sna->render.vertex_index > sna->render.vertex_start);
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
62,11 → 87,14
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
sna_vertex_wait__locked(&sna->render);
 
/* Note: we only need dword alignment (currently) */
 
hint = CREATE_GTT_MAP;
 
bo = sna->render.vbo;
if (bo) {
for (i = 0; i < sna->render.nvertex_reloc; i++) {
88,11 → 116,15
sna->render.vb_id = 0;
 
kgem_bo_destroy(&sna->kgem, bo);
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
} else {
if (kgem_is_idle(&sna->kgem)) {
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
return 0;
}
}
 
hint = CREATE_GTT_MAP;
if (bo)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
 
size = 256*1024;
assert(!sna->render.active);
163,7 → 195,7
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) {
} else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices =
kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
176,9 → 208,16
 
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
int size;
 
size = sna->kgem.nbatch;
size += sna->kgem.batch_size - sna->kgem.surface;
size += sna->render.vertex_used;
 
if (size <= 1024) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface);
memcpy(sna->kgem.batch + sna->kgem.nbatch,
sna->render.vertex_data,
sna->render.vertex_used * 4);
186,6 → 225,37
bo = NULL;
sna->kgem.nbatch += sna->render.vertex_used;
} else {
size = 256 * 1024;
do {
bo = kgem_create_linear(&sna->kgem, size,
CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED);
} while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used);
 
sna->render.vertices = NULL;
if (bo)
sna->render.vertices = kgem_bo_map(&sna->kgem, bo);
if (sna->render.vertices != NULL) {
DBG(("%s: new vbo: %d / %d\n", __FUNCTION__,
sna->render.vertex_used, __kgem_bo_size(bo)/4));
 
assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo));
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
 
size = __kgem_bo_size(bo)/4;
if (size >= UINT16_MAX)
size = UINT16_MAX - 1;
 
sna->render.vbo = bo;
sna->render.vertex_size = size;
} else {
DBG(("%s: tmp vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
 
if (bo)
kgem_bo_destroy(&sna->kgem, bo);
 
bo = kgem_create_linear(&sna->kgem,
4*sna->render.vertex_used,
CREATE_NO_THROTTLE);
195,11 → 265,14
kgem_bo_destroy(&sna->kgem, bo);
bo = NULL;
}
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
 
assert(sna->render.vbo == NULL);
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
}
}
}
 
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
/contrib/sdk/sources/Intel-2D/sna/gen4_vertex.h
6,6 → 6,7
#include "sna.h"
#include "sna_render.h"
 
void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op);
void gen4_vertex_flush(struct sna *sna);
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
/contrib/sdk/sources/Intel-2D/sna/gen5_render.c
42,6 → 42,7
 
#include "brw/brw.h"
#include "gen5_render.h"
#include "gen4_common.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
719,16 → 720,10
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
sna->render_state.gen5.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
op->floats_per_vertex));
gen4_vertex_align(sna, op);
sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
}
}
942,10 → 937,14
inline static void
gen5_emit_pipe_flush(struct sna *sna)
{
#if 0
OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
#else
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
#endif
}
 
static void
1311,11 → 1310,12
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
return false;
}
 
gen5_align_vertex(sna, &tmp);
gen5_video_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
1452,7 → 1452,6
}
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
1577,8 → 1576,8
goto cleanup_mask;
}
 
gen5_align_vertex(sna, tmp);
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
1806,8 → 1805,8
goto cleanup_src;
}
 
gen5_align_vertex(sna, &tmp->base);
gen5_bind_surfaces(sna, &tmp->base);
gen5_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
1952,7 → 1951,10
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
DBG(("%s: aperture check failed\n", __FUNCTION__));
goto fallback_tiled_src;
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
goto fallback_blt;
}
}
 
1963,8 → 1965,8
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
 
gen5_align_vertex(sna, &tmp);
gen5_copy_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
do {
int n_this_time;
1999,8 → 2001,6
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2021,17 → 2021,7
}
 
#endif
 
static void
gen5_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen5_render_context_switch(struct kgem *kgem,
int new_mode)
{
2060,42 → 2050,6
}
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen5_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen5_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen5_render_reset(struct sna *sna)
{
sna->render_state.gen5.needs_invariant = true;
2107,8 → 2061,7
sna->render_state.gen5.drawrect_limit = -1;
sna->render_state.gen5.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
2351,8 → 2304,8
return backend;
 
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
2362,7 → 2315,7
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen5_check_composite_spans;
sna->render.composite_spans = gen5_render_composite_spans;
if (sna->PciInfo->device_id == 0x0044)
if (intel_get_device_id(sna->scrn) == 0x0044)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen5_render_video;
2378,7 → 2331,7
sna->render.blit_tex = gen5_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen5_render_flush;
sna->render.flush = gen4_render_flush;
sna->render.reset = gen5_render_reset;
sna->render.fini = gen5_render_fini;
 
2466,8 → 2419,8
kgem_submit(&sna->kgem);
}
 
gen5_align_vertex(sna, tmp);
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
 
return true;
 
}
/contrib/sdk/sources/Intel-2D/sna/gen6_common.c
0,0 → 1,71
/*
* Copyright © 2011-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "gen6_common.h"
#include "gen4_vertex.h"
 
void
gen6_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: from %d to %d, submit batch\n", __FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
if (kgem->nexec) {
DBG(("%s: from %d to %d, reset incomplete batch\n", __FUNCTION__, kgem->mode, new_mode));
kgem_reset(kgem);
}
 
assert(kgem->nbatch == 0);
assert(kgem->nreloc == 0);
assert(kgem->nexec == 0);
 
kgem->ring = new_mode;
}
 
void gen6_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.nvertex_reloc == 0 &&
sna->render.vbo &&
!kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
/contrib/sdk/sources/Intel-2D/sna/gen6_common.h
0,0 → 1,139
/*
* Copyright © 2011-2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef GEN6_COMMON_H
#define GEN6_COMMON_H
 
#include "sna.h"
 
#define NO_RING_SWITCH 0
#define PREFER_RENDER 0
 
static inline bool is_uncached(struct sna *sna,
struct kgem_bo *bo)
{
return bo->scanout && !sna->kgem.has_wt;
}
 
inline static bool can_switch_to_blt(struct sna *sna,
struct kgem_bo *bo,
unsigned flags)
{
return false;
}
 
inline static bool can_switch_to_render(struct sna *sna,
struct kgem_bo *bo)
{
if (sna->kgem.ring == KGEM_RENDER)
return true;
 
if (NO_RING_SWITCH)
return false;
 
if (!sna->kgem.has_semaphores)
return false;
 
if (bo && !RQ_IS_BLT(bo->rq) && !is_uncached(sna, bo))
return true;
 
return !kgem_ring_is_idle(&sna->kgem, KGEM_RENDER);
}
 
static inline bool untiled_tlb_miss(struct kgem_bo *bo)
{
if (kgem_bo_is_render(bo))
return false;
 
return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
}
 
static int prefer_blt_bo(struct sna *sna, struct kgem_bo *bo)
{
if (bo->rq)
return RQ_IS_BLT(bo->rq);
 
if (sna->flags & SNA_POWERSAVE)
return true;
 
return bo->tiling == I915_TILING_NONE || is_uncached(sna, bo);
}
 
inline static bool force_blt_ring(struct sna *sna)
{
if (sna->flags & SNA_POWERSAVE)
return true;
 
if (sna->kgem.mode == KGEM_RENDER)
return false;
 
if (sna->render_state.gt < 2)
return true;
 
return false;
}
 
inline static bool prefer_blt_ring(struct sna *sna,
struct kgem_bo *bo,
unsigned flags)
{
assert(!force_blt_ring(sna));
assert(!kgem_bo_is_render(bo));
 
return can_switch_to_blt(sna, bo, flags);
}
 
inline static bool prefer_render_ring(struct sna *sna,
struct kgem_bo *bo)
{
if (sna->flags & SNA_POWERSAVE)
return false;
 
if (sna->render_state.gt < 2)
return false;
 
return can_switch_to_render(sna, bo);
}
 
inline static bool
prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
{
return false;
 
}
 
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo,
unsigned flags)
{
return false;
}
 
void gen6_render_context_switch(struct kgem *kgem, int new_mode);
void gen6_render_retire(struct kgem *kgem);
 
#endif /* GEN6_COMMON_H */
/contrib/sdk/sources/Intel-2D/sna/gen6_render.c
39,6 → 39,8
 
#include "brw/brw.h"
#include "gen6_render.h"
#include "gen6_common.h"
#include "gen4_common.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
74,6 → 76,7
int max_vs_entries;
int max_gs_entries;
} urb;
int gt;
};
 
static const struct gt_info gt1_info = {
82,6 → 85,7
.max_gs_threads = 21,
.max_wm_threads = 40,
.urb = { 32, 256, 256 },
.gt = 1,
};
 
static const struct gt_info gt2_info = {
90,6 → 94,7
.max_gs_threads = 60,
.max_wm_threads = 80,
.urb = { 64, 256, 256 },
.gt = 2,
};
 
static const uint32_t ps_kernel_packed[][4] = {
872,21 → 877,22
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall = wm_binding_table & 1;
bool need_flush, need_stall;
 
assert(op->dst.bo->exec);
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
need_flush =
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)) &&
wm_binding_table & 1;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_vertex_elements(sna, op);
 
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
need_stall = gen6_emit_binding_table(sna, wm_binding_table & ~1);
if (gen6_emit_drawing_rectangle(sna, op))
need_stall = false;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (need_flush || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
1317,16 → 1323,10
{
assert (sna->render.vertex_offset == 0);
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
sna->render_state.gen6.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
op->floats_per_vertex));
gen4_vertex_align(sna, op);
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1657,8 → 1657,8
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, &tmp);
gen6_emit_video_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
1853,9 → 1853,9
} else
sna_render_picture_extents(dst, &box);
 
// op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
// PREFER_GPU | FORCE_GPU | RENDER_GPU,
// &box, &op->damage);
op->dst.bo = sna_drawable_use_bo(dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
1925,7 → 1925,13
return true;
 
if (gen6_composite_fallback(sna, src, mask, dst))
return false;
return (mask == NULL &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, true));
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
2051,8 → 2057,8
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, tmp);
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
2284,8 → 2290,8
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, &tmp->base);
gen6_emit_composite_state(sna, &tmp->base);
gen6_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
2351,10 → 2357,16
untiled_tlb_miss(dst_bo))
return true;
 
if (force_blt_ring(sna))
return true;
 
if (kgem_bo_is_render(dst_bo) ||
kgem_bo_is_render(src_bo))
return false;
 
if (prefer_render_ring(sna, dst_bo))
return false;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
2553,13 → 2565,17
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
DBG(("%s: too large for a single operation\n",
__FUNCTION__));
goto fallback_tiled_src;
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
goto fallback_blt;
}
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, &tmp);
gen6_emit_copy_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int16_t *v;
2596,9 → 2612,6
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2720,8 → 2733,8
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, &op->base);
gen6_emit_copy_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_copy_blt;
op->done = gen6_render_copy_done;
2760,24 → 2773,6
gen6_emit_state(sna, op, offset | dirty);
}
 
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (kgem_bo_is_render(bo))
return false;
 
if (untiled_tlb_miss(bo))
return true;
 
if (!prefer_blt_ring(sna, bo, 0))
return false;
 
return prefer_blt_bo(sna, bo);
}
 
static bool
gen6_render_fill_boxes(struct sna *sna,
CARD8 op,
2799,7 → 2794,8
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
!gen6_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
2874,13 → 2870,14
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_align_vertex(sna, &tmp);
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int n_this_time;
3009,12 → 3006,12
static bool
gen6_render_fill(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
uint32_t color, unsigned flags,
struct sna_fill_op *op)
{
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
 
if (prefer_blt_fill(sna, dst_bo) &&
if (prefer_blt_fill(sna, dst_bo, flags) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
3053,13 → 3050,14
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_align_vertex(sna, &op->base);
gen6_emit_fill_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_op_fill_blt;
op->box = gen6_render_op_fill_box;
3097,7 → 3095,7
int16_t *v;
 
/* Prefer to use the BLT if already engaged */
if (prefer_blt_fill(sna, bo) &&
if (prefer_blt_fill(sna, bo, FILL_BOXES) &&
gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
3133,6 → 3131,7
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3141,8 → 3140,8
}
}
 
gen6_align_vertex(sna, &tmp);
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
3219,6 → 3218,7
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3227,8 → 3227,8
}
}
 
gen6_align_vertex(sna, &tmp);
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
3251,60 → 3251,6
}
#endif
 
static void gen6_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen6_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen6_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen6_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen6_render_reset(struct sna *sna)
{
sna->render_state.gen6.needs_invariant = true;
3320,6 → 3266,11
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
 
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
3330,17 → 3281,17
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
}
 
static bool is_gt2(struct sna *sna)
static bool is_gt2(struct sna *sna, int devid)
{
return sna->PciInfo->device_id & 0x30;
return devid & 0x30;
}
 
static bool is_mobile(struct sna *sna)
static bool is_mobile(struct sna *sna, int devid)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
return (devid & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
static bool gen6_render_setup(struct sna *sna, int devid)
{
struct gen6_render_state *state = &sna->render_state.gen6;
struct sna_static_stream general;
3348,8 → 3299,9
int i, j, k, l, m;
 
state->info = &gt1_info;
if (is_gt2(sna))
if (is_gt2(sna, devid))
state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
state->gt = state->info->gt;
 
sna_static_stream_init(&general);
 
3420,12 → 3372,14
 
const char *gen6_render_init(struct sna *sna, const char *backend)
{
if (!gen6_render_setup(sna))
int devid = intel_get_device_id(sna);
 
if (!gen6_render_setup(sna, devid))
return backend;
 
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen6_render_expire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
3436,7 → 3390,7
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen6_check_composite_spans;
sna->render.composite_spans = gen6_render_composite_spans;
if (is_mobile(sna))
if (is_mobile(sna, devid))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen6_render_video;
3465,7 → 3419,7
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
sna->render.blit_tex = gen6_blit_tex;
 
sna->render.flush = gen6_render_flush;
sna->render.flush = gen4_render_flush;
sna->render.reset = gen6_render_reset;
sna->render.fini = gen6_render_fini;
 
3568,7 → 3522,7
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
3576,8 → 3530,8
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_align_vertex(sna, tmp);
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
 
return true;
 
}
/contrib/sdk/sources/Intel-2D/sna/gen7_render.c
42,10 → 42,14
 
#include "brw/brw.h"
#include "gen7_render.h"
#include "gen4_common.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
#include "gen6_common.h"
 
#define ALWAYS_INVALIDATE 0
#define ALWAYS_FLUSH 0
#define ALWAYS_STALL 0
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
1022,28 → 1026,39
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_invalidate;
bool need_flush;
bool need_stall;
 
assert(op->dst.bo->exec);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
gen7_emit_vertex_elements(sna, op);
need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
if (ALWAYS_INVALIDATE)
need_invalidate = true;
 
need_stall = gen7_emit_binding_table(sna, wm_binding_table);
need_flush =
sna->render_state.gen7.emit_flush &&
wm_binding_table & GEN7_READS_DST(op->u.gen7.flags);
if (ALWAYS_FLUSH)
need_flush = true;
 
wm_binding_table &= ~1;
 
need_stall = sna->render_state.gen7.surface_table != wm_binding_table;
need_stall &= gen7_emit_drawing_rectangle(sna, op);
if (ALWAYS_STALL)
need_stall = true;
 
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (need_invalidate) {
gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
sna->render_state.gen7.emit_flush = false;
 
need_flush = false;
need_stall = false;
}
if (sna->render_state.gen7.emit_flush) {
if (need_flush) {
gen7_emit_pipe_flush(sna, need_stall);
need_stall = false;
}
1050,6 → 1065,13
if (need_stall)
gen7_emit_pipe_stall(sna);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
gen7_emit_vertex_elements(sna, op);
gen7_emit_binding_table(sna, wm_binding_table);
 
sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
}
 
1404,12 → 1426,14
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
uint16_t offset, dirty;
 
gen7_get_batch(sna, op);
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
dirty = kgem_bo_is_dirty(op->dst.bo);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
1438,7 → 1462,7
offset = sna->render_state.gen7.surface_table;
}
 
gen7_emit_state(sna, op, offset);
gen7_emit_state(sna, op, offset | dirty);
}
 
static void
1445,16 → 1469,9
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen7.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
DBG(("aligning vertex: was %d, now %d floats per vertex\n",
sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex));
gen4_vertex_align(sna, op);
sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
}
}
1548,7 → 1565,7
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
uint16_t offset, dirty;
int n_src, n;
 
gen7_get_batch(sna, op);
1586,6 → 1603,8
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
dirty = kgem_bo_is_dirty(op->dst.bo);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
1602,7 → 1621,7
src_surf_format);
}
 
gen7_emit_state(sna, op, offset);
gen7_emit_state(sna, op, offset | dirty);
}
 
static bool
1669,12 → 1688,14
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
return false;
 
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_align_vertex(sna, &tmp);
gen7_emit_video_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
1874,7 → 1895,8
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
!gen7_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
1949,11 → 1971,17
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
return false;
}
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_align_vertex(sna, &tmp);
gen7_emit_fill_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
do {
int n_this_time;
1987,60 → 2015,6
}
#endif
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen7_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: switch rings %d -> %d\n",
__FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen7_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen7_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen7_render_reset(struct sna *sna)
{
sna->render_state.gen7.emit_flush = false;
2056,6 → 2030,11
sna->render_state.gen7.drawrect_limit = -1;
sna->render_state.gen7.surface_table = -1;
 
if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
2066,23 → 2045,23
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
 
static bool is_gt3(struct sna *sna)
static bool is_gt3(struct sna *sna, int devid)
{
assert(sna->kgem.gen == 075);
return sna->PciInfo->device_id & 0x20;
return devid & 0x20;
}
 
static bool is_gt2(struct sna *sna)
static bool is_gt2(struct sna *sna, int devid)
{
return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
return devid & (is_hsw(sna)? 0x30 : 0x20);
}
 
static bool is_mobile(struct sna *sna)
static bool is_mobile(struct sna *sna, int devid)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
return (devid & 0xf) == 0x6;
}
 
static bool gen7_render_setup(struct sna *sna)
static bool gen7_render_setup(struct sna *sna, int devid)
{
struct gen7_render_state *state = &sna->render_state.gen7;
struct sna_static_stream general;
2091,9 → 2070,9
 
if (is_ivb(sna)) {
state->info = &ivb_gt_info;
if (sna->PciInfo->device_id & 0xf) {
if (devid & 0xf) {
state->info = &ivb_gt1_info;
if (is_gt2(sna))
if (is_gt2(sna, devid))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
} else if (is_byt(sna)) {
2100,10 → 2079,10
state->info = &byt_gt_info;
} else if (is_hsw(sna)) {
state->info = &hsw_gt_info;
if (sna->PciInfo->device_id & 0xf) {
if (is_gt3(sna))
if (devid & 0xf) {
if (is_gt3(sna, devid))
state->info = &hsw_gt3_info;
else if (is_gt2(sna))
else if (is_gt2(sna, devid))
state->info = &hsw_gt2_info;
else
state->info = &hsw_gt1_info;
2111,6 → 2090,8
} else
return false;
 
state->gt = state->info->gt;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2175,12 → 2156,14
 
const char *gen7_render_init(struct sna *sna, const char *backend)
{
if (!gen7_render_setup(sna))
int devid = intel_get_device_id(sna);
 
if (!gen7_render_setup(sna, devid))
return backend;
 
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
sna->kgem.expire = gen7_render_expire;
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
2190,7 → 2173,7
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen7_check_composite_spans;
sna->render.composite_spans = gen7_render_composite_spans;
if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen7_render_video;
2219,7 → 2202,7
sna->render.blit_tex = gen7_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen7_render_flush;
sna->render.flush = gen4_render_flush;
sna->render.reset = gen7_render_reset;
sna->render.fini = gen7_render_fini;
 
2312,7 → 2295,7
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
2320,7 → 2303,7
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_align_vertex(sna, tmp);
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
/contrib/sdk/sources/Intel-2D/sna/kgem.c
47,7 → 47,6
 
#include "sna_cpuid.h"
 
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
60,7 → 59,7
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_CREATE2 1
#define DBG_NO_USERPTR 0
#define DBG_NO_USERPTR 1
#define DBG_NO_UNSYNCHRONIZED_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
72,7 → 71,7
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 1
#define DBG_NO_HANDLE_LUT 0
#define DBG_NO_WT 0
#define DBG_DUMP 0
 
105,10 → 104,8
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 1)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
158,8 → 155,13
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
uint32_t mmapped : 2;
};
enum {
MMAPPED_NONE,
MMAPPED_GTT,
MMAPPED_CPU
};
 
static struct kgem_bo *__kgem_freed_bo;
static struct kgem_request *__kgem_freed_request;
252,10 → 254,11
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
}
 
static uint32_t gem_userptr(int fd, void *ptr, int size, int read_only)
{
return 0;
}
 
 
 
 
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_NO_RETIRE) {
289,24 → 292,23
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
assert(kgem_bo_can_map(kgem, bo));
assert(num_pages(bo) <= kgem->aperture_mappable / 4);
 
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
int err = 0;
 
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry_gtt;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
if (kgem_cleanup_cache(kgem))
goto retry_gtt;
}
 
printf("%s: failed to retrieve GTT offset for handle=%d\n",
__FUNCTION__, bo->handle);
ErrorF("%s: failed to retrieve GTT offset for handle=%d: %d\n",
__FUNCTION__, bo->handle, err);
return NULL;
}
 
321,7 → 323,7
return ptr;
}
 
static int __gem_write(int fd, uint32_t handle,
static int gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
338,7 → 340,7
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
static int gem_write(int fd, uint32_t handle,
static int gem_write__cachealigned(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
631,7 → 633,7
static unsigned
cpu_cache_size__cpuid4(void)
{
/* Deterministic Cache Parmaeters (Function 04h)":
/* Deterministic Cache Parameters (Function 04h)":
* When EAX is initialized to a value of 4, the CPUID instruction
* returns deterministic cache information in the EAX, EBX, ECX
* and EDX registers. This function requires ECX be initialized
755,7 → 757,7
* hw acceleration.
*/
 
if (kgem->gen == 060 && dev->revision < 8) {
if (kgem->gen == 060 && dev && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
881,7 → 883,7
 
static bool kgem_init_pinned_batches(struct kgem *kgem)
{
int count[2] = { 2, 2 };
int count[2] = { 4, 4 };
int size[2] = { 1, 2 };
int n, i;
 
911,6 → 913,7
pin.alignment = 0;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
gem_close(kgem->fd, pin.handle);
free(bo);
goto err;
}
bo->presumed_offset = pin.offset;
1028,7 → 1031,6
kgem->has_no_reloc));
 
kgem->has_handle_lut = test_has_handle_lut(kgem);
kgem->has_handle_lut = 0;
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
 
1042,6 → 1044,10
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
 
kgem->can_render_y = gen != 021 && (gen >> 3) != 4;
DBG(("%s: can render to Y-tiled surfaces? %d\n", __FUNCTION__,
kgem->can_render_y));
 
kgem->has_secure_batches = test_has_secure_batches(kgem);
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
1115,6 → 1121,8
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
 
kgem->aperture_mappable = 256 * 1024 * 1024;
if (dev != NULL)
kgem->aperture_mappable = agp_aperture_size(dev, gen);
if (kgem->aperture_mappable == 0 ||
kgem->aperture_mappable > aperture.aper_size)
1149,6 → 1157,14
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
 
if (kgem->aperture_high > totalram / 2) {
kgem->aperture_high = totalram / 2;
kgem->aperture_low = kgem->aperture_high / 4;
DBG(("%s: reduced aperture watermaks to fit into ram; low=%d [%d], high=%d [%d]\n", __FUNCTION__,
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
}
 
kgem->max_cpu_size = kgem->max_object_size;
 
half_gpu_max = kgem->max_gpu_size / 2;
1197,8 → 1213,10
kgem->max_upload_tile_size, kgem->max_copy_tile_size));
 
/* Convert the aperture thresholds to pages */
kgem->aperture_mappable /= PAGE_SIZE;
kgem->aperture_low /= PAGE_SIZE;
kgem->aperture_high /= PAGE_SIZE;
kgem->aperture_total /= PAGE_SIZE;
 
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
1233,7 → 1251,7
return kgem->min_alignment;
}
 
void kgem_get_tile_size(struct kgem *kgem, int tiling,
void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
int *tile_width, int *tile_height, int *tile_size)
{
if (kgem->gen <= 030) {
1270,6 → 1288,10
*tile_size = 4096;
break;
}
 
/* Force offset alignment to tile-row */
if (tiling && kgem->gen < 033)
*tile_width = pitch;
}
 
uint32_t kgem_surface_size(struct kgem *kgem,
1400,10 → 1422,15
 
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
assert(bo->proxy == NULL);
 
bo->exec = kgem_add_handle(kgem, bo);
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
list_move_tail(&bo->request, &kgem->next_request->buffers);
if (bo->io && !list_is_empty(&bo->list))
list_move(&bo->list, &kgem->batch_buffers);
 
/* XXX is it worth working around gcc here? */
kgem->flush |= bo->flush;
1461,26 → 1488,6
}
}
 
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
{
int type = IS_CPU_MAP(bo->map);
 
assert(!IS_USER_MAP(bo->map));
 
DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
__FUNCTION__, type ? "CPU" : "GTT",
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
list_del(&bo->vma);
kgem->vma[type].count--;
}
}
 
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
1496,22 → 1503,32
 
kgem_bo_binding_free(kgem, bo);
 
if (IS_USER_MAP(bo->map)) {
if (IS_USER_MAP(bo->map__cpu)) {
assert(bo->rq == NULL);
assert(!__kgem_busy(kgem, bo->handle));
assert(MAP(bo->map) != bo || bo->io || bo->flush);
assert(MAP(bo->map__cpu) != bo || bo->io || bo->flush);
if (!(bo->io || bo->flush)) {
DBG(("%s: freeing snooped base\n", __FUNCTION__));
assert(bo != MAP(bo->map));
free(MAP(bo->map));
assert(bo != MAP(bo->map__cpu));
free(MAP(bo->map__cpu));
}
bo->map = NULL;
bo->map__cpu = NULL;
}
if (bo->map)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
assert(bo->map == NULL);
 
DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
__FUNCTION__, bo->map__gtt, bo->map__cpu,
bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count));
 
if (!list_is_empty(&bo->vma)) {
_list_del(&bo->vma);
kgem->vma[bo->map__gtt == NULL].count--;
}
 
// if (bo->map__gtt)
// munmap(MAP(bo->map__gtt), bytes(bo));
// if (bo->map__cpu)
// munmap(MAP(bo->map__cpu), bytes(bo));
 
_list_del(&bo->list);
_list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
1546,25 → 1563,31
kgem->need_expire = true;
 
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
list_move(&bo->list, &kgem->large_inactive);
return;
if (bo->map__gtt) {
// munmap(MAP(bo->map__gtt), bytes(bo));
bo->map__gtt = NULL;
}
 
list_move(&bo->list, &kgem->large_inactive);
} else {
assert(bo->flush == false);
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
(!type && !__kgem_bo_is_mappable(kgem, bo))) {
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
if (bo->map__gtt) {
if (!kgem_bo_can_map(kgem, bo)) {
// munmap(MAP(bo->map__gtt), bytes(bo));
bo->map__gtt = NULL;
}
if (bo->map) {
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
kgem->vma[type].count++;
if (bo->map__gtt) {
list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
kgem->vma[0].count++;
}
}
if (bo->map__cpu && !bo->map__gtt) {
list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
kgem->vma[1].count++;
}
}
}
 
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
{
1574,6 → 1597,10
return bo;
 
assert(!bo->snoop);
if (__kgem_freed_bo) {
base = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)base;
} else
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
1600,10 → 1627,10
list_del(&bo->list);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (bo->map) {
assert(!list_is_empty(&bo->vma));
if (!list_is_empty(&bo->vma)) {
assert(bo->map__gtt || bo->map__cpu);
list_del(&bo->vma);
kgem->vma[IS_CPU_MAP(bo->map)].count--;
kgem->vma[bo->map__gtt == NULL].count--;
}
}
 
1614,8 → 1641,10
 
list_del(&bo->list);
assert(bo->rq != NULL);
if (bo->rq == (void *)kgem)
if (RQ(bo->rq) == (void *)kgem) {
assert(bo->exec == NULL);
list_del(&bo->request);
}
assert(list_is_empty(&bo->vma));
}
 
1740,6 → 1769,7
if (kgem->nexec != 1 || bo->exec == NULL)
return;
 
assert(bo);
DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
__FUNCTION__, bo->handle));
 
1750,6 → 1780,10
bo->refcnt++;
kgem_reset(kgem);
bo->refcnt--;
 
assert(kgem->nreloc == 0);
assert(kgem->nexec == 0);
assert(bo->exec == NULL);
}
 
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
1777,7 → 1811,7
kgem_bo_move_to_snoop(kgem, bo);
return;
}
if (!IS_USER_MAP(bo->map))
if (!IS_USER_MAP(bo->map__cpu))
bo->flush = false;
 
if (bo->scanout) {
1793,9 → 1827,6
goto destroy;
}
 
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
 
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->flush == false);
1824,7 → 1855,7
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
 
if (!IS_CPU_MAP(bo->map)) {
if (bo->map__cpu == NULL || bucket(bo) >= NUM_CACHE_BUCKETS) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
 
1852,16 → 1883,18
 
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
{
assert(bo->base.io);
while (!list_is_empty(&bo->base.vma)) {
struct kgem_bo *cached;
 
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
assert(cached->proxy == &bo->base);
assert(cached != &bo->base);
list_del(&cached->vma);
 
assert(*(struct kgem_bo **)cached->map == cached);
*(struct kgem_bo **)cached->map = NULL;
cached->map = NULL;
assert(*(struct kgem_bo **)cached->map__gtt == cached);
*(struct kgem_bo **)cached->map__gtt = NULL;
cached->map__gtt = NULL;
 
kgem_bo_destroy(kgem, cached);
}
1877,6 → 1910,10
struct kgem_buffer,
base.list);
 
DBG(("%s: handle=%d, busy? %d [%d]\n",
__FUNCTION__, bo->base.handle, bo->base.rq != NULL, bo->base.exec != NULL));
 
assert(bo->base.exec == NULL || RQ(bo->base.rq) == kgem->next_request);
if (bo->base.rq)
break;
 
1897,7 → 1934,7
bool retired = false;
 
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
assert(bo->rq == (void *)kgem);
assert(RQ(bo->rq) == (void *)kgem);
assert(bo->exec == NULL);
 
if (__kgem_busy(kgem, bo->handle))
1960,7 → 1997,8
DBG(("%s: moving %d to flushing\n",
__FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
bo->rq = MAKE_REQUEST(kgem, RQ_RING(bo->rq));
kgem->need_retire = true;
continue;
}
 
1985,6 → 2023,7
}
 
assert(rq->bo->rq == NULL);
assert(rq->bo->exec == NULL);
assert(list_is_empty(&rq->bo->request));
 
if (--rq->bo->refcnt == 0) {
2057,7 → 2096,7
{
bool retired = false;
 
DBG(("%s\n", __FUNCTION__));
DBG(("%s, need_retire?=%d\n", __FUNCTION__, kgem->need_retire));
 
kgem->need_retire = false;
 
2077,6 → 2116,7
{
struct kgem_request *rq;
 
assert(ring < ARRAY_SIZE(kgem->requests));
assert(!list_is_empty(&kgem->requests[ring]));
 
rq = list_last_entry(&kgem->requests[ring],
2091,10 → 2131,24
__FUNCTION__, ring, rq->bo->handle));
 
kgem_retire__requests_ring(kgem, ring);
kgem_retire__buffers(kgem);
 
assert(list_is_empty(&kgem->requests[ring]));
return true;
}
 
#ifndef NDEBUG
static void kgem_commit__check_buffers(struct kgem *kgem)
{
struct kgem_buffer *bo;
 
list_for_each_entry(bo, &kgem->active_buffers, base.list)
assert(bo->base.exec == NULL);
}
#else
#define kgem_commit__check_buffers(kgem)
#endif
 
static void kgem_commit(struct kgem *kgem)
{
struct kgem_request *rq = kgem->next_request;
2118,6 → 2172,7
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
assert(!bo->proxy);
kgem_bo_free(kgem, bo);
continue;
}
2128,7 → 2183,6
 
if (bo->proxy) {
/* proxies are not used for domain tracking */
bo->exec = NULL;
__kgem_bo_clear_busy(bo);
}
 
2152,7 → 2206,8
kgem_retire(kgem);
assert(list_is_empty(&rq->buffers));
 
assert(rq->bo->map == NULL);
assert(rq->bo->map__gtt == NULL);
assert(rq->bo->map__cpu == NULL);
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
} else {
2161,6 → 2216,8
}
 
kgem->next_request = NULL;
 
kgem_commit__check_buffers(kgem);
}
 
static void kgem_close_list(struct kgem *kgem, struct list *head)
2182,17 → 2239,18
struct kgem_buffer *bo, *next;
 
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n",
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s, refcnt=%d\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no"));
bo->write, bo->mmapped == MMAPPED_CPU ? "cpu" : bo->mmapped == MMAPPED_GTT ? "gtt" : "no",
bo->base.refcnt));
 
assert(next->base.list.prev == &bo->base.list);
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
if (!bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d\n",
__FUNCTION__, bo->base.handle, bo->used));
if (bo->base.refcnt > 1 && !bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d, refcnt=%d\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.refcnt));
continue;
}
 
2202,7 → 2260,7
}
 
if (bo->mmapped) {
int used;
uint32_t used;
 
assert(!bo->need_io);
 
2209,20 → 2267,21
used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
(kgem->has_llc || bo->mmapped == MMAPPED_GTT || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d): used=%d, refcnt=%d\n",
__FUNCTION__, bo->used, bytes(&bo->base), used, bo->base.refcnt));
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
kgem->need_retire = true;
continue;
}
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
__FUNCTION__, bo->used, bo->mmapped));
goto decouple;
}
 
if (!bo->used) {
if (!bo->used || !bo->base.exec) {
/* Unless we replace the handle in the execbuffer,
* then this bo will become active. So decouple it
* from the buffer list and track it in the normal
2301,7 → 2360,7
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
if (gem_write(kgem->fd, shrink->handle,
if (gem_write__cachealigned(kgem->fd, shrink->handle,
0, bo->used, bo->mem) == 0) {
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
2340,7 → 2399,7
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
gem_write__cachealigned(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
bo->need_io = 0;
 
2390,33 → 2449,58
 
ASSERT_IDLE(kgem, handle);
 
retry:
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
if (kgem->surface == kgem->batch_size) {
if (gem_write__cachealigned(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
kgem->batch) == 0)
return 0;
 
goto expire;
}
 
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
if (gem_write__cachealigned(kgem->fd, handle,
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
kgem->batch) == 0)
return 0;
 
goto expire;
}
 
/* Disjoint surface/batch, upload separately */
ret = gem_write(kgem->fd, handle,
if (gem_write__cachealigned(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
if (ret)
return ret;
kgem->batch))
goto expire;
 
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
if (gem_write(kgem->fd, handle,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
kgem->batch + kgem->surface))
goto expire;
 
return 0;
 
expire:
ret = errno;
assert(ret != EINVAL);
 
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry;
 
if (kgem_cleanup_cache(kgem))
goto retry;
 
ErrorF("%s: failed to write batch (handle=%d): %d\n",
__FUNCTION__, handle, ret);
return ret;
}
 
void kgem_reset(struct kgem *kgem)
2442,6 → 2526,7
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
kgem->need_retire = true;
} else
__kgem_bo_clear_busy(bo);
 
2474,6 → 2559,7
kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->aperture_max_fence = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
2599,10 → 2685,10
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
 
DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d [fenced=%d]\n",
kgem->mode, kgem->ring, kgem->batch_flags,
batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced));
 
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
2660,8 → 2746,8
{
int fd = open("/tmp1/1/batchbuffer.bin", O_CREAT|O_WRONLY|O_BINARY);
if (fd != -1) {
write(fd, kgem->batch, size);
close(fd);
ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
fd = close(fd);
}
else printf("SNA: failed to write batchbuffer\n");
asm volatile("int3");
2694,9 → 2780,9
 
#if 0
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d, fenced=%d, high=%d,%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, kgem->aperture_fenced, kgem->aperture_high, kgem->aperture_total, errno);
 
for (i = 0; i < kgem->nexec; i++) {
struct kgem_bo *bo, *found = NULL;
2764,7 → 2850,7
}
}
 
void kgem_purge_cache(struct kgem *kgem)
static void kgem_purge_cache(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
int i;
2892,7 → 2978,7
break;
}
 
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
if (bo->map__cpu && bo->delta + MAP_PRESERVE_TIME > expire) {
idle = false;
list_move_tail(&bo->list, &preserve);
} else {
2932,7 → 3018,7
(void)size;
}
 
void kgem_cleanup_cache(struct kgem *kgem)
bool kgem_cleanup_cache(struct kgem *kgem)
{
unsigned int i;
int n;
2962,6 → 3048,9
kgem_retire(kgem);
kgem_cleanup(kgem);
 
if (!kgem->need_expire)
return false;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
while (!list_is_empty(&kgem->inactive[i]))
kgem_bo_free(kgem,
2984,6 → 3073,7
 
kgem->need_purge = false;
kgem->need_expire = false;
return true;
}
 
static struct kgem_bo *
3028,8 → 3118,10
goto discard;
 
list_del(&bo->list);
if (bo->rq == (void *)kgem)
if (RQ(bo->rq) == (void *)kgem) {
assert(bo->exec == NULL);
list_del(&bo->request);
}
 
bo->delta = 0;
assert_tiling(kgem, bo);
3083,7 → 3175,7
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(for_cpu ? bo->map__cpu : bo->map__gtt);
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
3107,6 → 3199,8
continue;
 
kgem_bo_remove_from_inactive(kgem, bo);
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
3163,10 → 3257,10
bo->pitch = 0;
}
 
if (bo->map) {
if (bo->map__gtt || bo->map__cpu) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (IS_CPU_MAP(bo->map) != for_cpu) {
if (for_cpu ? bo->map__cpu : bo->map__gtt){
if (first != NULL)
break;
 
3181,6 → 3275,9
continue;
}
} else {
if (flags & CREATE_GTT_MAP && !kgem_bo_can_map(kgem, bo))
continue;
 
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
if (first != NULL)
break;
3202,6 → 3299,7
__FUNCTION__, bo->handle, num_pages(bo),
use_active ? "active" : "inactive"));
assert(list_is_empty(&bo->list));
assert(list_is_empty(&bo->vma));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
assert_tiling(kgem, bo);
3223,6 → 3321,7
__FUNCTION__, first->handle, num_pages(first),
use_active ? "active" : "inactive"));
assert(list_is_empty(&first->list));
assert(list_is_empty(&first->vma));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
3282,11 → 3381,11
assert(kgem->gen < 040);
 
if (kgem->gen < 030)
size = 512 * 1024;
size = 512 * 1024 / PAGE_SIZE;
else
size = 1024 * 1024;
while (size < bytes(bo))
size *= 2;
size = 1024 * 1024 / PAGE_SIZE;
while (size < num_pages(bo))
size <<= 1;
 
return size;
}
3308,7 → 3407,6
if (tiling < 0)
exact = true, tiling = -tiling;
 
 
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
width, height, bpp, tiling, exact,
!!(flags & CREATE_INACTIVE),
3324,61 → 3422,6
size /= PAGE_SIZE;
bucket = cache_bucket(size);
 
if (flags & CREATE_SCANOUT) {
struct kgem_bo *last = NULL;
 
list_for_each_entry_reverse(bo, &kgem->scanout, list) {
assert(bo->scanout);
assert(bo->delta);
assert(!bo->flush);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo) || num_pages(bo) > 2*size)
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (flags & CREATE_INACTIVE && bo->rq) {
last = bo;
continue;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
if (last) {
list_del(&last->list);
 
last->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
last->pitch, last->tiling, last->handle, last->unique_id));
assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
assert_tiling(kgem, last);
last->refcnt = 1;
return last;
}
 
bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch);
if (bo)
return bo;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
3428,7 → 3471,6
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
bo->flush = true;
return bo;
}
 
3488,9 → 3530,9
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(for_cpu ? bo->map__cpu : bo->map__gtt);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
assert(bo->flush == false);
assert_tiling(kgem, bo);
3520,6 → 3562,8
bo->domain = DOMAIN_NONE;
 
kgem_bo_remove_from_inactive(kgem, bo);
assert(list_is_empty(&bo->list));
assert(list_is_empty(&bo->vma));
 
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
3740,9 → 3784,6
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
3751,6 → 3792,8
}
 
kgem_bo_remove_from_inactive(kgem, bo);
assert(list_is_empty(&bo->list));
assert(list_is_empty(&bo->vma));
 
bo->pitch = pitch;
bo->tiling = tiling;
3799,12 → 3842,6
return NULL;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
bo->flush = true;
}
 
bo->unique_id = kgem_get_unique_id(kgem);
if (tiling == I915_TILING_NONE ||
gem_set_tiling(kgem->fd, handle, tiling, pitch)) {
3935,16 → 3972,21
__FUNCTION__, bo->handle, bo->proxy != NULL));
 
if (bo->proxy) {
assert(!bo->reusable);
kgem_bo_binding_free(kgem, bo);
 
assert(list_is_empty(&bo->list));
_list_del(&bo->vma);
_list_del(&bo->request);
if (bo->io && bo->exec == NULL)
 
if (bo->io && bo->domain == DOMAIN_CPU)
_kgem_bo_delete_buffer(kgem, bo);
 
kgem_bo_unref(kgem, bo->proxy);
kgem_bo_binding_free(kgem, bo);
free(bo);
return;
}
 
*(struct kgem_bo **)bo = __kgem_freed_bo;
__kgem_freed_bo = bo;
} else
__kgem_bo_destroy(kgem, bo);
}
 
3989,6 → 4031,58
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
 
static bool aperture_check(struct kgem *kgem, unsigned num_pages)
{
if (kgem->aperture) {
struct drm_i915_gem_get_aperture aperture;
 
VG_CLEAR(aperture);
aperture.aper_available_size = kgem->aperture_high;
aperture.aper_available_size *= PAGE_SIZE;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
 
DBG(("%s: aperture required %ld bytes, available %ld bytes\n",
__FUNCTION__,
(long)num_pages * PAGE_SIZE,
(long)aperture.aper_available_size));
 
/* Leave some space in case of alignment issues */
aperture.aper_available_size -= 1024 * 1024;
aperture.aper_available_size -= kgem->aperture_mappable * PAGE_SIZE / 2;
if (kgem->gen < 033)
aperture.aper_available_size -= kgem->aperture_max_fence * PAGE_SIZE;
if (!kgem->has_llc)
aperture.aper_available_size -= 2 * kgem->nexec * PAGE_SIZE;
 
DBG(("%s: num_pages=%d, estimated max usable=%ld\n",
__FUNCTION__, num_pages, (long)(aperture.aper_available_size/PAGE_SIZE)));
 
if (num_pages <= aperture.aper_available_size / PAGE_SIZE)
return true;
}
 
return false;
}
 
static inline bool kgem_flush(struct kgem *kgem, bool flush)
{
if (unlikely(kgem->wedged))
return false;
 
if (kgem->nreloc == 0)
return true;
 
if (container_of(kgem, struct sna, kgem)->flags & SNA_POWERSAVE)
return true;
 
if (kgem->flush == flush && kgem->aperture < kgem->aperture_low)
return true;
 
DBG(("%s: opportunistic flushing? flush=%d,%d, aperture=%d/%d, idle?=%d\n",
__FUNCTION__, kgem->flush, flush, kgem->aperture, kgem->aperture_low, kgem_ring_is_idle(kgem, kgem->ring)));
return !kgem_ring_is_idle(kgem, kgem->ring);
}
 
bool kgem_check_bo(struct kgem *kgem, ...)
{
va_list ap;
3996,6 → 4090,7
int num_exec = 0;
int num_pages = 0;
bool flush = false;
bool busy = true;
 
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
4004,13 → 4099,16
if (bo->exec)
continue;
 
if (needs_semaphore(kgem, bo))
if (needs_semaphore(kgem, bo)) {
DBG(("%s: flushing for required semaphore\n", __FUNCTION__));
return false;
}
 
num_pages += num_pages(bo);
num_exec++;
 
flush |= bo->flush;
busy &= bo->rq != NULL;
}
va_end(ap);
 
4020,40 → 4118,129
if (!num_pages)
return true;
 
if (kgem_flush(kgem, flush))
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
return false;
 
if (kgem->aperture > kgem->aperture_low &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
}
 
if (num_pages + kgem->aperture > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
if (!aperture_check(kgem, num_pages + kgem->aperture))
return false;
}
 
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
if (busy)
return true;
 
return kgem_flush(kgem, flush);
}
 
#if 0
bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
while (bo->proxy)
bo = bo->proxy;
assert(bo->refcnt);
 
if (bo->exec) {
if (kgem->gen < 040 &&
bo->tiling != I915_TILING_NONE &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
uint32_t size;
 
assert(bo->tiling == I915_TILING_X);
 
if (kgem->nfence >= kgem->fence_max)
return false;
 
if (kgem->aperture_fenced) {
size = 3*kgem->aperture_fenced;
if (kgem->aperture_total == kgem->aperture_mappable)
size += kgem->aperture;
if (size > kgem->aperture_mappable &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
return false;
}
}
 
size = kgem_bo_fenced_size(kgem, bo);
if (size > kgem->aperture_max_fence)
kgem->aperture_max_fence = size;
size += kgem->aperture_fenced;
if (kgem->gen < 033)
size += kgem->aperture_max_fence;
if (kgem->aperture_total == kgem->aperture_mappable)
size += kgem->aperture;
if (size > kgem->aperture_mappable) {
DBG(("%s: estimated fence space required [%d] exceed aperture [%d]\n",
__FUNCTION__, size, kgem->aperture_mappable));
return false;
}
}
 
return true;
}
 
if (kgem->nexec >= KGEM_EXEC_SIZE(kgem) - 1)
return false;
 
if (needs_semaphore(kgem, bo)) {
DBG(("%s: flushing for required semaphore\n", __FUNCTION__));
return false;
}
 
assert_tiling(kgem, bo);
if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) {
uint32_t size;
 
assert(bo->tiling == I915_TILING_X);
 
if (kgem->nfence >= kgem->fence_max)
return false;
 
if (kgem->aperture_fenced) {
size = 3*kgem->aperture_fenced;
if (kgem->aperture_total == kgem->aperture_mappable)
size += kgem->aperture;
if (size > kgem->aperture_mappable &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: opportunistic fence flush\n", __FUNCTION__));
return false;
}
}
 
size = kgem_bo_fenced_size(kgem, bo);
if (size > kgem->aperture_max_fence)
kgem->aperture_max_fence = size;
size += kgem->aperture_fenced;
if (kgem->gen < 033)
size += kgem->aperture_max_fence;
if (kgem->aperture_total == kgem->aperture_mappable)
size += kgem->aperture;
if (size > kgem->aperture_mappable) {
DBG(("%s: estimated fence space required [%d] exceed aperture [%d]\n",
__FUNCTION__, size, kgem->aperture_mappable));
return false;
}
}
 
if (kgem->aperture + kgem->aperture_fenced + num_pages(bo) > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages(bo) + kgem->aperture, kgem->aperture_high));
if (!aperture_check(kgem, num_pages(bo) + kgem->aperture + kgem->aperture_fenced))
return false;
}
 
if (bo->rq)
return true;
 
return kgem_flush(kgem, bo->flush);
}
#endif
 
 
 
4071,9 → 4258,6
 
 
 
 
 
 
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
4085,23 → 4269,14
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
 
assert(kgem->gen < 0100);
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
if( bo != NULL && bo->handle == -2)
{
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
__kgem_bo_mark_dirty(bo);
}
return 0;
};
 
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(kgem->mode != KGEM_NONE);
assert(bo->refcnt);
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
4115,6 → 4290,7
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
bo->domain = DOMAIN_GPU;
}
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty)
4133,6 → 4309,7
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(bo->tiling == I915_TILING_X);
assert(kgem->nfence < kgem->fence_max);
kgem->aperture_fenced +=
kgem_bo_fenced_size(kgem, bo);
4164,6 → 4341,77
return delta;
}
 
uint64_t kgem_add_reloc64(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domain,
uint64_t delta)
{
int index;
 
DBG(("%s: handle=%d, pos=%d, delta=%ld, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, (long)delta, read_write_domain));
 
assert(kgem->gen >= 0100);
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(kgem->mode != KGEM_NONE);
assert(bo->refcnt);
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%ld] for handle=%d\n",
__FUNCTION__, (long)bo->delta, bo->handle));
delta += bo->delta;
assert(bo->handle == bo->proxy->handle);
/* need to release the cache upon batch submit */
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
bo->domain = DOMAIN_GPU;
}
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty)
__kgem_bo_mark_dirty(bo);
 
bo = bo->proxy;
assert(bo->refcnt);
}
assert(bo->refcnt);
 
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(RQ_RING(bo->rq) == kgem->ring);
 
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
 
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
if (kgem->nreloc__self < 256)
kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
 
return delta;
}
 
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
4186,6 → 4434,7
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
void **ptr;
 
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
4198,15 → 4447,14
break;
 
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
assert(IS_CPU_MAP(bo->map) == type);
assert(bo->map);
__FUNCTION__, type ? "CPU" : "GTT", bo->handle));
 
ptr = type ? &bo->map__cpu : &bo->map__gtt;
assert(bo->rq == NULL);
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo)));
// munmap(MAP(*ptr), bytes(bo));
*ptr = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
 
4222,12 → 4470,11
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
4236,12 → 4483,9
return kgem_bo_map__cpu(kgem, bo);
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(num_pages(bo) <= kgem->aperture_mappable / 2);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
4254,7 → 4498,7
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
4265,12 → 4509,11
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert(bo->exec == NULL);
assert_tiling(kgem, bo);
 
4284,12 → 4527,9
return ptr;
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(num_pages(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
4303,7 → 4543,7
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
4333,20 → 4573,16
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(bytes(bo) <= kgem->aperture_mappable / 4);
assert(num_pages(bo) <= kgem->aperture_mappable / 4);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
4359,7 → 4595,7
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
4368,11 → 4604,7
 
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->map)
return MAP(bo->map);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
return bo->map = __kgem_bo_map__gtt(kgem, bo);
return kgem_bo_map__async(kgem, bo);
}
 
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
4379,18 → 4611,15
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
DBG(("%s(handle=%d, size=%d, map=%p:%p)\n",
__FUNCTION__, bo->handle, bytes(bo), bo->map__gtt, bo->map__cpu));
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
if (bo->map__cpu)
return MAP(bo->map__cpu);
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
 
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
retry:
4399,17 → 4628,17
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
int err = 0;
 
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
if (kgem_cleanup_cache(kgem))
goto retry;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n",
__FUNCTION__, bo->handle, bytes(bo));
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), err);
return NULL;
}
 
4416,58 → 4645,66
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
 
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
return bo->map__cpu = (void *)(uintptr_t)mmap_arg.addr_ptr;
}
 
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 
/*
struct kgem_bo *kgem_create_map(struct kgem *kgem,
void *ptr, uint32_t size,
bool read_only)
{
struct drm_i915_gem_mmap mmap_arg;
struct kgem_bo *bo;
uintptr_t first_page, last_page;
uint32_t handle;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(bo->refcnt);
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
assert(MAP(ptr) == ptr);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
if (!kgem->has_userptr)
return NULL;
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
int err = errno;
first_page = (uintptr_t)ptr;
last_page = first_page + size + PAGE_SIZE - 1;
 
assert(err != EINVAL);
first_page &= ~(PAGE_SIZE-1);
last_page &= ~(PAGE_SIZE-1);
assert(last_page > first_page);
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
handle = gem_userptr(kgem->fd,
(void *)first_page, last_page-first_page,
read_only);
if (handle == 0)
return NULL;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
bo = __kgem_bo_alloc(handle, (last_page - first_page) / PAGE_SIZE);
if (bo == NULL) {
gem_close(kgem->fd, handle);
return NULL;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), err);
bo->snoop = !kgem->has_llc;
debug_alloc__bo(kgem, bo);
 
if (first_page != (uintptr_t)ptr) {
struct kgem_bo *proxy;
 
proxy = kgem_create_proxy(kgem, bo,
(uintptr_t)ptr - first_page, size);
kgem_bo_destroy(kgem, bo);
if (proxy == NULL)
return NULL;
 
bo = proxy;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
if (bo->map && bo->domain == DOMAIN_CPU) {
DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle));
kgem_bo_release_map(kgem, bo);
bo->map__cpu = MAKE_USER_MAP(ptr);
 
DBG(("%s(ptr=%p, size=%d, pages=%d, read_only=%d) => handle=%d (proxy? %d)\n",
__FUNCTION__, ptr, size, NUM_PAGES(size), read_only, handle, bo->proxy != NULL));
return bo;
}
if (bo->map == NULL) {
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
}
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
*/
 
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
4500,6 → 4737,72
}
}
 
void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(!bo->scanout || !write);
 
if (write || bo->needs_flush)
kgem_bo_submit(kgem, bo);
 
/* SHM pixmaps use proxies for subpage offsets */
assert(!bo->purged);
assert(bo->refcnt);
while (bo->proxy)
bo = bo->proxy;
assert(bo->refcnt);
assert(!bo->purged);
 
if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__, bo->handle,
bo->needs_flush, bo->domain,
__kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = write ? I915_GEM_DOMAIN_CPU : 0;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
if (bo->exec == NULL)
kgem_bo_retire(kgem, bo);
bo->domain = write ? DOMAIN_CPU : DOMAIN_NONE;
}
}
}
 
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->refcnt);
assert(bo->proxy == NULL);
 
kgem_bo_submit(kgem, bo);
 
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__, bo->handle,
bo->needs_flush, bo->domain,
__kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_GTT;
bo->gtt_dirty = true;
}
}
}
 
void kgem_clear_dirty(struct kgem *kgem)
{
struct list * const buffers = &kgem->next_request->buffers;
4542,7 → 4845,7
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
 
if (target->exec) {
if (target->exec && !bo->io) {
list_move_tail(&bo->request, &kgem->next_request->buffers);
bo->exec = &_kgem_dummy_exec;
}
4563,7 → 4866,7
 
bo->mem = NULL;
bo->need_io = false;
bo->mmapped = true;
bo->mmapped = MMAPPED_CPU;
 
return bo;
}
4638,7 → 4941,7
assert(bo->base.snoop);
assert(bo->base.tiling == I915_TILING_NONE);
assert(num_pages(&bo->base) >= alloc);
assert(bo->mmapped == true);
assert(bo->mmapped == MMAPPED_CPU);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
4685,7 → 4988,7
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->mmapped == MMAPPED_CPU);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
4721,7 → 5024,7
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->mmapped == MMAPPED_CPU);
assert(bo->need_io == false);
 
if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
4763,12 → 5066,12
DBG(("%s: created snoop handle=%d for buffer\n",
__FUNCTION__, bo->base.handle));
 
assert(bo->mmapped == true);
assert(bo->mmapped == MMAPPED_CPU);
assert(bo->need_io == false);
 
bo->base.refcnt = 1;
bo->base.snoop = true;
bo->base.map = MAKE_USER_MAP(bo->mem);
bo->base.map__cpu = MAKE_USER_MAP(bo->mem);
 
return bo;
}
4801,11 → 5104,12
/* We can reuse any write buffer which we can fit */
if (flags == KGEM_BUFFER_LAST &&
bo->write == KGEM_BUFFER_WRITE &&
bo->base.refcnt == 1 && !bo->mmapped &&
bo->base.refcnt == 1 &&
bo->mmapped == MMAPPED_NONE &&
size <= bytes(&bo->base)) {
DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
__FUNCTION__, size, bo->used, bytes(&bo->base)));
gem_write(kgem->fd, bo->base.handle,
gem_write__cachealigned(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
kgem_buffer_release(kgem, bo);
bo->need_io = 0;
4845,10 → 5149,11
list_for_each_entry(bo, &kgem->active_buffers, base.list) {
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
assert(bo->base.exec == NULL);
assert(bo->mmapped);
assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop);
assert(bo->mmapped == MMAPPED_GTT || kgem->has_llc || bo->base.snoop);
 
if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE && !bo->base.snoop) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
4862,8 → 5167,31
list_move(&bo->base.list, &kgem->batch_buffers);
goto done;
}
 
if (size <= bytes(&bo->base) &&
(bo->base.rq == NULL ||
!__kgem_busy(kgem, bo->base.handle))) {
DBG(("%s: reusing whole buffer? size=%d, total=%d\n",
__FUNCTION__, size, bytes(&bo->base)));
__kgem_bo_clear_busy(&bo->base);
kgem_buffer_release(kgem, bo);
 
switch (bo->mmapped) {
case MMAPPED_CPU:
kgem_bo_sync__cpu(kgem, &bo->base);
break;
case MMAPPED_GTT:
kgem_bo_sync__gtt(kgem, &bo->base);
break;
}
 
offset = 0;
bo->used = size;
list_move(&bo->base.list, &kgem->batch_buffers);
goto done;
}
}
}
#endif
 
#if !DBG_NO_MAP_UPLOAD
4875,9 → 5203,9
alloc = PAGE_ALIGN(size);
assert(alloc);
 
alloc /= PAGE_SIZE;
if (alloc > kgem->aperture_mappable / 4)
flags &= ~KGEM_BUFFER_INPLACE;
alloc /= PAGE_SIZE;
 
if (kgem->has_llc &&
(flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
4963,7 → 5291,7
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
if (old == NULL) {
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old && !__kgem_bo_is_mappable(kgem, old)) {
if (old && !kgem_bo_can_map(kgem, old)) {
_kgem_bo_destroy(kgem, old);
old = NULL;
}
4971,7 → 5299,7
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
assert(__kgem_bo_is_mappable(kgem, old));
assert(kgem_bo_can_map(kgem, old));
assert(!old->snoop);
assert(old->rq == NULL);
 
4987,8 → 5315,10
 
bo->mem = kgem_bo_map(kgem, &bo->base);
if (bo->mem) {
if (IS_CPU_MAP(bo->base.map))
if (bo->mem == MAP(bo->base.map__cpu))
flags &= ~KGEM_BUFFER_INPLACE;
else
bo->mmapped = MMAPPED_GTT;
goto init;
} else {
bo->base.refcnt = 0;
5107,7 → 5437,8
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
assert(!bo->mmapped || bo->base.map != NULL);
assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem);
assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
 
bo->used = size;
bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
5121,6 → 5452,7
 
done:
bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
assert(bo->used && bo->used <= bytes(&bo->base));
assert(bo->mem);
*ret = (char *)bo->mem + offset;
return kgem_create_proxy(kgem, &bo->base, offset, size);
5177,7 → 5509,7
bo->size.bytes -= stride;
}
 
bo->map = MAKE_CPU_MAP(*ret);
bo->map__cpu = *ret;
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
5222,10 → 5554,10
struct kgem_bo **ptr)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->map == NULL || IS_CPU_MAP(bo->map));
assert(bo->map__gtt == NULL);
assert(bo->proxy);
list_add(&bo->vma, &bo->proxy->vma);
bo->map = ptr;
bo->map__gtt = ptr;
*ptr = kgem_bo_reference(bo);
}
 
5258,13 → 5590,13
bo->base.domain,
__kgem_busy(kgem, bo->base.handle)));
 
assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc);
assert(bo->mmapped == MMAPPED_GTT || bo->base.snoop || kgem->has_llc);
 
VG_CLEAR(set_domain);
set_domain.handle = bo->base.handle;
set_domain.write_domain = 0;
set_domain.read_domains =
IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
bo->mmapped == MMAPPED_CPU ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
 
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
/contrib/sdk/sources/Intel-2D/sna/kgem.h
71,9 → 71,8
struct list request;
struct list vma;
 
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
void *map__cpu;
void *map__gtt;
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
struct kgem_bo_binding {
82,11 → 81,11
uint16_t offset;
} binding;
 
uint64_t presumed_offset;
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
struct {
200,11 → 199,12
uint32_t has_handle_lut :1;
 
uint32_t can_blt_cpu :1;
uint32_t can_render_y :1;
 
uint16_t fence_max;
uint16_t half_cpu_cache_pages;
uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
uint32_t aperture, aperture_fenced;
uint32_t aperture, aperture_fenced, aperture_max_fence;
uint32_t max_upload_tile_size, max_copy_tile_size;
uint32_t max_gpu_size, max_cpu_size;
uint32_t large_object_size, max_object_size;
313,6 → 313,8
int bpp,
uint32_t flags);
 
bool kgem_bo_convert_to_gpu(struct kgem *kgem, struct kgem_bo *bo);
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
 
349,14 → 351,6
_kgem_submit(kgem);
}
 
static inline bool kgem_flush(struct kgem *kgem, bool flush)
{
if (kgem->nreloc == 0)
return false;
 
return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->exec)
392,8 → 386,10
kgem_submit(kgem);
#endif
 
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: flushing before new bo\n", __FUNCTION__));
_kgem_submit(kgem);
}
 
if (kgem->mode == mode)
return;
466,6 → 462,11
struct kgem_bo *bo,
uint32_t read_write_domains,
uint32_t delta);
uint64_t kgem_add_reloc64(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domains,
uint64_t delta);
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
475,8 → 476,6
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write);
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
483,7 → 482,7
const void *data, int length);
 
int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo);
void kgem_get_tile_size(struct kgem *kgem, int tiling,
void kgem_get_tile_size(struct kgem *kgem, int tiling, int pitch,
int *tile_width, int *tile_height, int *tile_size);
 
static inline int __kgem_buffer_size(struct kgem_bo *bo)
498,6 → 497,12
return PAGE_SIZE * bo->size.pages.count;
}
 
static inline int __kgem_bo_num_pages(struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
return bo->size.pages.count;
}
 
static inline int kgem_bo_size(struct kgem_bo *bo)
{
if (bo->proxy)
506,7 → 511,6
return __kgem_bo_size(bo);
}
 
/*
static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
struct kgem_bo *bo)
{
533,81 → 537,7
 
return kgem_bo_blt_pitch_is_ok(kgem, bo);
}
*/
 
static inline bool __kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
if (bo->domain == DOMAIN_GTT)
return true;
 
if (kgem->gen < 040 && bo->tiling &&
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
if (kgem->has_llc && bo->tiling == I915_TILING_NONE)
return true;
 
if (!bo->presumed_offset)
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 
return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable;
}
 
static inline bool kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: domain=%d, offset: %d size: %d\n",
__FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo)));
assert(bo->refcnt);
return __kgem_bo_is_mappable(kgem, bo);
}
 
static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map, bo->tiling, bo->domain));
assert(bo->refcnt);
 
if (bo->map == NULL)
return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU;
 
return IS_CPU_MAP(bo->map) == !bo->tiling;
}
 
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
if (kgem_bo_mapped(kgem, bo))
return true;
 
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
}
 
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
struct kgem_bo *bo,
bool write)
{
if (bo->purged || (bo->scanout && write))
return false;
 
if (kgem->has_llc)
return true;
 
if (bo->domain != DOMAIN_CPU)
return false;
 
return !write || bo->exec == NULL;
}
 
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
assert(bo->refcnt);
652,9 → 582,6
if (bo->exec)
return true;
 
if (kgem_flush(kgem, bo->flush))
kgem_submit(kgem);
 
if (bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
723,6 → 650,53
} while ((bo = bo->proxy));
}
 
static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p:%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain));
 
if (bo->tiling == I915_TILING_NONE && (bo->domain == DOMAIN_CPU || kgem->has_llc))
return bo->map__cpu != NULL;
 
return bo->map__gtt != NULL;
}
 
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p:%p, tiling=%d, domain=%d, offset=%ld\n",
__FUNCTION__, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset));
 
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
 
if (bo->map__gtt != NULL)
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
if (!bo->presumed_offset)
return __kgem_bo_num_pages(bo) <= kgem->aperture_mappable / 4;
 
return bo->presumed_offset / PAGE_SIZE + __kgem_bo_num_pages(bo) <= kgem->aperture_mappable;
}
 
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
struct kgem_bo *bo,
bool write)
{
if (bo->purged || (bo->scanout && write))
return false;
 
if (kgem->has_llc)
return true;
 
if (bo->domain != DOMAIN_CPU)
return false;
 
return !write || bo->exec == NULL;
}
 
#define KGEM_BUFFER_WRITE 0x1
#define KGEM_BUFFER_INPLACE 0x2
#define KGEM_BUFFER_LAST 0x4
742,8 → 716,7
void kgem_throttle(struct kgem *kgem);
#define MAX_INACTIVE_TIME 10
bool kgem_expire_cache(struct kgem *kgem);
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
bool kgem_cleanup_cache(struct kgem *kgem);
 
void kgem_clean_scanout_cache(struct kgem *kgem);
void kgem_clean_large_cache(struct kgem *kgem);
758,4 → 731,6
}
#endif
 
void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling);
 
#endif /* KGEM_H */
/contrib/sdk/sources/Intel-2D/sna/sna.c
706,7 → 706,20
 
 
 
int intel_get_device_id(struct sna *sna)
{
struct drm_i915_getparam gp;
int devid = 0;
 
memset(&gp, 0, sizeof(gp));
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
 
if (drmIoctl(sna->scrn, DRM_IOCTL_I915_GETPARAM, &gp))
return 0;
return devid;
}
 
static const struct intel_device_info intel_generic_info = {
.gen = -1,
};
814,21 → 827,6
return &intel_generic_info;
}
 
int intel_get_device_id(int fd)
{
struct drm_i915_getparam gp;
int devid = 0;
 
memset(&gp, 0, sizeof(gp));
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
 
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
return 0;
 
return devid;
}
 
int drmIoctl(int fd, unsigned long request, void *arg)
{
ioctl_t io;
/contrib/sdk/sources/Intel-2D/sna/sna.h
448,7 → 448,8
unsigned flags;
#define SNA_NO_WAIT 0x1
#define SNA_NO_FLIP 0x2
#define SNA_TRIPLE_BUFFER 0x4
#define SNA_NO_VSYNC 0x4
#define SNA_TRIPLE_BUFFER 0x8
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
#define SNA_FLUSH_GTT 0x40
490,6 → 491,7
uint32_t fill_alu;
} blt_state;
union {
unsigned gt;
struct gen3_render_state gen3;
struct gen4_render_state gen4;
struct gen5_render_state gen5;
497,6 → 499,8
struct gen7_render_state gen7;
} render_state;
 
bool dri_available;
bool dri_open;
 
/* Broken-out options. */
// OptionInfoPtr Options;
611,4 → 615,7
 
return memcmp(a, b, sizeof(*a)) == 0;
}
 
int intel_get_device_id(struct sna *sna);
 
#endif /* _SNA_H */
/contrib/sdk/sources/Intel-2D/sna/sna_reg.h
42,22 → 42,22
#define BLT_SRC_TILED (1<<15)
#define BLT_DST_TILED (1<<11)
 
#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4))
#define XY_SETUP_BLT ((2<<29)|(1<<22)|6)
#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7)
#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1)
#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1)
#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16))
#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4)
#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
#define COLOR_BLT_CMD (2<<29|0x40<<22|(0x3))
#define XY_COLOR_BLT (2<<29|0x50<<22|(0x4))
#define XY_SETUP_BLT (2<<29|0x01<<22)
#define XY_SETUP_MONO_PATTERN_SL_BLT (2<<29|0x11<<22)
#define XY_SETUP_CLIP (2<<29|0x03<<22|1)
#define XY_SCANLINE_BLT (2<<29|0x25<<22|1)
#define XY_TEXT_IMMEDIATE_BLT (2<<29|0x31<<22|(1<<16))
#define XY_SRC_COPY_BLT_CMD (2<<29|0x53<<22)
#define SRC_COPY_BLT_CMD (2<<29|0x43<<22|0x4)
#define XY_PAT_BLT (2<<29|0x51<<22)
#define XY_PAT_BLT_IMMEDIATE (2<<29|0x72<<22)
#define XY_MONO_PAT (2<<29|0x52<<22)
#define XY_MONO_SRC_COPY (2<<29|0x54<<22)
#define XY_MONO_SRC_COPY_IMM (2<<29|0x71<<22)
#define XY_FULL_MONO_PATTERN_BLT (2<<29|0x57<<22)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT (2<<29|0x58<<22)
 
/* FLUSH commands */
#define BRW_3D(Pipeline,Opcode,Subopcode) \
/contrib/sdk/sources/Intel-2D/sna/sna_render.h
104,6 → 104,7
uint32_t inplace :1;
uint32_t overwrites:1;
uint32_t bpp : 6;
uint32_t alu : 4;
 
uint32_t cmd;
uint32_t br13;
245,7 → 246,7
struct sna_solid_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[1024];
uint32_t color[1025];
uint32_t color[1024];
int last;
int size;
int dirty;
381,6 → 382,7
};
 
struct gen6_render_state {
unsigned gt;
const struct gt_info *info;
struct kgem_bo *general_bo;
 
430,6 → 432,7
};
 
struct gen7_render_state {
unsigned gt;
const struct gt_info *info;
struct kgem_bo *general_bo;
 
/contrib/sdk/sources/Intel-2D/uxa/uxa.c
70,7 → 70,7
// intel_debug_flush(scrn);
}
 
int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
int uxa_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
{
struct intel_screen_private *intel = intel_get_screen_private();
drm_intel_bo *bo;
118,14 → 118,14
return -1;
};
 
void sna_set_bo_handle(bitmap_t *bitmap, int handle)
void uxa_set_bo_handle(bitmap_t *bitmap, int handle)
{
sna_bitmap_from_handle(bitmap, handle);
uxa_bitmap_from_handle(bitmap, handle);
};
 
 
int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y,
int w, int h, int src_x, int src_y)
int uxa_blit_tex(bitmap_t *bitmap, int scale, int vsync,
int dst_x, int dst_y,int w, int h, int src_x, int src_y)
{
// DBG("%s\n", __FUNCTION__);