WebSVN – Kolibri OS – Path Comparison – / – /drivers/ Rev 4251 and /drivers/ Rev 4250

Regard whitespace Rev 4251 → Rev 4250

/drivers/video/Intel-2D/sna_transform.c
File deleted

/drivers/video/Intel-2D/kgem_debug_gen5.c
File deleted

/drivers/video/Intel-2D/kgem_debug_gen4.c
File deleted

/drivers/video/Intel-2D/sna_cpu.c
File deleted

/drivers/video/Intel-2D/kgem.c
File deleted

/drivers/video/Intel-2D/kgem_debug_gen3.c
File deleted

/drivers/video/Intel-2D/kgem_debug_gen7.c
File deleted

/drivers/video/Intel-2D/sna_cpuid.h
File deleted

/drivers/video/Intel-2D/i915_pciids.h
File deleted

 /drivers/video/Intel-2D/gen3_render.c
 ,15 → 107,7
         {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
         {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
         {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
-        {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
-        {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
-        {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
-        {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
-        {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
-        {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
-        {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
-        {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
-        {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
+        {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}
 };
 #define xFixedToDouble(f) pixman_fixed_to_double(f)
 ,14 → 187,7
         switch (format) {
         case PICT_a8r8g8b8:
         case PICT_x8r8g8b8:
-        case PICT_r5g6b5:
-        case PICT_a1r5g5b5:
-        case PICT_x1r5g5b5:
-        case PICT_a2r10g10b10:
-        case PICT_x2r10g10b10:
         case PICT_a8:
-        case PICT_a4r4g4b4:
-        case PICT_x4r4g4b4:
                 return false;
         default:
                 return true;
 ,257 → 207,15
         case PICT_a8b8g8r8:
         case PICT_x8b8g8r8:
                 return BIAS | COLR_BUF_ARGB8888;
-        case PICT_r5g6b5:
-        case PICT_b5g6r5:
-                return BIAS | COLR_BUF_RGB565;
-        case PICT_a1r5g5b5:
-        case PICT_x1r5g5b5:
-        case PICT_a1b5g5r5:
-        case PICT_x1b5g5r5:
-                return BIAS | COLR_BUF_ARGB1555;
-        case PICT_a2r10g10b10:
-        case PICT_x2r10g10b10:
-        case PICT_a2b10g10r10:
-        case PICT_x2b10g10r10:
-                return BIAS | COLR_BUF_ARGB2AAA;
         case PICT_a8:
                 return BIAS | COLR_BUF_8BIT;
-        case PICT_a4r4g4b4:
-        case PICT_x4r4g4b4:
-        case PICT_a4b4g4r4:
-        case PICT_x4b4g4r4:
-                return BIAS | COLR_BUF_ARGB4444;
         }
 #undef BIAS
 }
-#if 0
-static bool gen3_check_repeat(PicturePtr p)
-{
-        if (!p->repeat)
-                return true;
-        switch (p->repeatType) {
-        case RepeatNone:
-        case RepeatNormal:
-        case RepeatPad:
-        case RepeatReflect:
-                return true;
-        default:
-                return false;
-        }
-}
-static uint32_t gen3_filter(uint32_t filter)
-{
-        switch (filter) {
-        default:
-                assert(0);
-        case PictFilterNearest:
-                return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
-                        FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
-                        MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
-        case PictFilterBilinear:
-                return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
-                        FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
-                        MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
-        }
-}
-static bool gen3_check_filter(PicturePtr p)
-{
-        switch (p->filter) {
-        case PictFilterNearest:
-        case PictFilterBilinear:
-                return true;
-        default:
-                return false;
-        }
-}
 fastcall static void
-gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
-                                                const struct sna_composite_op *op,
-                                                const struct sna_composite_rectangles *r)
-{
-        int16_t dst_x, dst_y;
-        int16_t src_x, src_y;
-        dst_x = r->dst.x + op->dst.x;
-        dst_y = r->dst.y + op->dst.y;
-        src_x = r->src.x + op->src.offset[0];
-        src_y = r->src.y + op->src.offset[1];
-        gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
-        OUT_VERTEX(src_x + r->width);
-        OUT_VERTEX(src_y + r->height);
-        gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
-        OUT_VERTEX(src_x);
-        OUT_VERTEX(src_y + r->height);
-        gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
-        OUT_VERTEX(src_x);
-        OUT_VERTEX(src_y);
-}
-fastcall static void
-gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
-                                            const BoxRec *box, int nbox,
-                                            float *v)
-{
-        do {
-                v[0] = box->x2;
-                v[1] = box->y2;
-                v[2] = box->x2 + op->src.offset[0];
-                v[3] = box->y2 + op->src.offset[1];
-                v[4] = box->x1;
-                v[5] = box->y2;
-                v[6] = box->x1 + op->src.offset[0];
-                v[7] = box->y2 + op->src.offset[1];
-                v[8] = box->x1;
-                v[9] = box->y1;
-                v[10] = box->x1 + op->src.offset[0];
-                v[11] = box->y1 + op->src.offset[1];
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-fastcall static void
-gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
-                                          const BoxRec *box, int nbox,
-                                          float *v)
-{
-        const PictTransform *transform = op->src.transform;
-        do {
-                v[0] = box->x2;
-                v[1] = box->y2;
-                _sna_get_transformed_scaled(box->x2 + op->src.offset[0],
-                                            box->y2 + op->src.offset[1],
-                                            transform, op->src.scale,
-                                            &v[2], &v[3]);
-                v[4] = box->x1;
-                v[5] = box->y2;
-                _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
-                                            box->y2 + op->src.offset[1],
-                                            transform, op->src.scale,
-                                            &v[6], &v[7]);
-                v[8] = box->x1;
-                v[9] = box->y1;
-                _sna_get_transformed_scaled(box->x1 + op->src.offset[0],
-                                            box->y1 + op->src.offset[1],
-                                            transform, op->src.scale,
-                                            &v[10], &v[11]);
-                box++;
-                v += 12;
-        } while (--nbox);
-}
-fastcall static void
-gen3_emit_composite_primitive_identity_source(struct sna *sna,
-                                              const struct sna_composite_op *op,
-                                              const struct sna_composite_rectangles *r)
-{
-        float w = r->width;
-        float h = r->height;
-        float *v;
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        v[8] = v[4] = r->dst.x + op->dst.x;
-        v[0] = v[4] + w;
-        v[9] = r->dst.y + op->dst.y;
-        v[5] = v[1] = v[9] + h;
-        v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
-        v[2] = v[6] + w * op->src.scale[0];
-        v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
-        v[7] = v[3] = v[11] + h * op->src.scale[1];
-}
-fastcall static void
-gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
-                                          const BoxRec *box, int nbox,
-                                          float *v)
-{
-        do {
-                v[0] = box->x2 + op->dst.x;
-                v[8] = v[4] = box->x1 + op->dst.x;
-                v[5] = v[1] = box->y2 + op->dst.y;
-                v[9] = box->y1 + op->dst.y;
-                v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
-                v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
-                v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
-                v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-fastcall static void
-gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
-                                                        const struct sna_composite_op *op,
-                                                        const struct sna_composite_rectangles *r)
-{
-        float w = r->width;
-        float h = r->height;
-        float *v;
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        v[8] = v[4] = r->dst.x;
-        v[9] = r->dst.y;
-        v[0] = v[4] + w;
-        v[5] = v[1] = v[9] + h;
-        v[10] = v[6] = r->src.x * op->src.scale[0];
-        v[11] = r->src.y * op->src.scale[1];
-        v[2] = v[6] + w * op->src.scale[0];
-        v[7] = v[3] = v[11] + h * op->src.scale[1];
-}
-fastcall static void
-gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
-                                                     const struct sna_composite_op *op,
-                                                     const struct sna_composite_rectangles *r)
-{
-        float w = r->width;
-        float h = r->height;
-        float *v;
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        v[8] = v[4] = r->dst.x + op->dst.x;
-        v[0] = v[4] + w;
-        v[9] = r->dst.y + op->dst.y;
-        v[5] = v[1] = v[9] + h;
-        v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
-        v[2] = v[6] + w * op->mask.scale[0];
-        v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
-        v[7] = v[3] = v[11] + h * op->mask.scale[1];
-}
-#endif
-fastcall static void
 gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
                                                    const struct sna_composite_op *op,
                                                    const struct sna_composite_rectangles *r)
 ,7 → 884,6
                 state->current_dst = bo->unique_id;
         }
-        assert(bo->exec);
         kgem_bo_mark_dirty(bo);
 }
 ,7 → 938,6
                 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
                 ss2 |= S2_TEXCOORD_FMT(tex_count,
                                        op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
-                assert(op->src.card_format);
                 map[tex_count * 2 + 0] =
                         op->src.card_format |
                         gen3_ms_tiling(op->src.bo->tiling) |
 ,7 → 973,6
                 ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
                 ss2 |= S2_TEXCOORD_FMT(tex_count,
                                        op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
-                assert(op->mask.card_format);
                 map[tex_count * 2 + 0] =
                         op->mask.card_format |
                         gen3_ms_tiling(op->mask.bo->tiling) |
 ,8 → 1357,8
                         goto start;
         }
+        assert(op->floats_per_rect >= vertex_space(sna));
         assert(rem <= vertex_space(sna));
-        assert(op->floats_per_rect <= rem);
         if (want > 1 && want * op->floats_per_rect > rem)
                 want = rem / op->floats_per_rect;
         sna->render.vertex_index += 3*want;
 ,674 → 1494,88
         return false;
 }
-#if 0
-static int
-gen3_composite_picture(struct sna *sna,
-                       PicturePtr picture,
-                       struct sna_composite_op *op,
-                       struct sna_composite_channel *channel,
-                       int16_t x, int16_t y,
-                       int16_t w, int16_t h,
-                       int16_t dst_x, int16_t dst_y,
-                       bool precise)
-{
-        PixmapPtr pixmap;
-        uint32_t color;
-        int16_t dx, dy;
-        DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
-             __FUNCTION__, x, y, w, h, dst_x, dst_y));
-        channel->card_format = 0;
-        if (picture->pDrawable == NULL) {
-                SourcePict *source = picture->pSourcePict;
-                int ret = -1;
-                switch (source->type) {
-                case SourcePictTypeSolidFill:
-                        DBG(("%s: solid fill [%08x], format %08x\n",
-                             __FUNCTION__,
-                             (unsigned)source->solidFill.color,
-                             (unsigned)picture->format));
-                        ret = gen3_init_solid(channel, source->solidFill.color);
-                        break;
-                case SourcePictTypeLinear:
-                        ret = gen3_init_linear(sna, picture, op, channel,
-                                               x - dst_x, y - dst_y);
-                        break;
-                case SourcePictTypeRadial:
-                        ret = gen3_init_radial(sna, picture, op, channel,
-                                               x - dst_x, y - dst_y);
-                        break;
-                }
-                if (ret == -1) {
-                        if (!precise)
-                                ret = sna_render_picture_approximate_gradient(sna, picture, channel,
-                                                                              x, y, w, h, dst_x, dst_y);
-                        if (ret == -1)
-                                ret = sna_render_picture_fixup(sna, picture, channel,
-                                                               x, y, w, h, dst_x, dst_y);
-                }
-                return ret;
-        }
-        if (picture->alphaMap) {
-                DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        }
-        if (sna_picture_is_solid(picture, &color)) {
-                DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
-                return gen3_init_solid(channel, color);
-        }
-        if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
-                DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
-                return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
-        }
-        if (!gen3_check_repeat(picture))
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        if (!gen3_check_filter(picture))
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-        channel->filter = picture->filter;
-        channel->pict_format = picture->format;
-        pixmap = get_drawable_pixmap(picture->pDrawable);
-        get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
-        x += dx + picture->pDrawable->x;
-        y += dy + picture->pDrawable->y;
-        if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
-                DBG(("%s: integer translation (%d, %d), removing\n",
-                     __FUNCTION__, dx, dy));
-                x += dx;
-                y += dy;
-                channel->transform = NULL;
-                channel->filter = PictFilterNearest;
-        } else {
-                channel->transform = picture->transform;
-                channel->is_affine = sna_transform_is_affine(picture->transform);
-        }
-        if (!gen3_composite_channel_set_format(channel, picture->format) &&
-            !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
-                return sna_render_picture_convert(sna, picture, channel, pixmap,
-                                                  x, y, w, h, dst_x, dst_y,
-                                                  false);
-        assert(channel->card_format);
-        if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
-                DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
-                     __FUNCTION__,
-                     pixmap->drawable.width, pixmap->drawable.height,
-                     x, y, w, h));
-                return sna_render_picture_extract(sna, picture, channel,
-                                                  x, y, w, h, dst_x, dst_y);
-        }
-        return sna_render_pixmap_bo(sna, channel, pixmap,
-                                    x, y, w, h, dst_x, dst_y);
-}
-static inline bool
-source_use_blt(struct sna *sna, PicturePtr picture)
-{
-        /* If it is a solid, try to use the BLT paths */
-        if (!picture->pDrawable)
-                return picture->pSourcePict->type == SourcePictTypeSolidFill;
-        if (picture->pDrawable->width  == 1 &&
-            picture->pDrawable->height == 1 &&
-            picture->repeat)
-                return true;
-        if (too_large(picture->pDrawable->width, picture->pDrawable->height))
-                return true;
-        return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
-}
-static bool
-try_blt(struct sna *sna,
-        PicturePtr dst,
-        PicturePtr src,
-        int width, int height)
-{
-        if (sna->kgem.mode != KGEM_RENDER) {
-                DBG(("%s: already performing BLT\n", __FUNCTION__));
-                return true;
-        }
-        if (too_large(width, height)) {
-                DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
-                     __FUNCTION__, width, height));
-                return true;
-        }
-        if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
-                DBG(("%s: target too large for 3D pipe (%d, %d)\n",
-                     __FUNCTION__,
-                     dst->pDrawable->width, dst->pDrawable->height));
-                return true;
-        }
-        /* is the source picture only in cpu memory e.g. a shm pixmap? */
-        return source_use_blt(sna, src);
-}
-#endif
-static void
-gen3_align_vertex(struct sna *sna,
-                  const struct sna_composite_op *op)
-{
-        if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
-                if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
-                        gen3_vertex_finish(sna);
-                DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
-                     sna->render_state.gen3.last_floats_per_vertex,
-                     op->floats_per_vertex,
-                     sna->render.vertex_index,
-                     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
-                sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
-                sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
-                assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
-                sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
-        }
-}
-static inline bool is_constant_ps(uint32_t type)
-{
-        switch (type) {
-        case SHADER_NONE: /* be warned! */
-        case SHADER_ZERO:
-        case SHADER_BLACK:
-        case SHADER_WHITE:
-        case SHADER_CONSTANT:
-                return true;
-        default:
-                return false;
-        }
-}
-#if 0
-static bool
-gen3_composite_fallback(struct sna *sna,
-                        uint8_t op,
-                        PicturePtr src,
-                        PicturePtr mask,
-                        PicturePtr dst)
-{
-        PixmapPtr src_pixmap;
-        PixmapPtr mask_pixmap;
-        PixmapPtr dst_pixmap;
-        bool src_fallback, mask_fallback;
-        if (!gen3_check_dst_format(dst->format)) {
-                DBG(("%s: unknown destination format: %d\n",
-                     __FUNCTION__, dst->format));
-                return true;
-        }
-        dst_pixmap = get_drawable_pixmap(dst->pDrawable);
-        src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
-        src_fallback = source_fallback(src, src_pixmap,
-                                       dst->polyMode == PolyModePrecise);
-        if (mask) {
-                mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
-                mask_fallback = source_fallback(mask, mask_pixmap,
-                                                dst->polyMode == PolyModePrecise);
-        } else {
-                mask_pixmap = NULL;
-                mask_fallback = false;
-        }
-        /* If we are using the destination as a source and need to
-         * readback in order to upload the source, do it all
-         * on the cpu.
-         */
-        if (src_pixmap == dst_pixmap && src_fallback) {
-                DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
-                return true;
-        }
-        if (mask_pixmap == dst_pixmap && mask_fallback) {
-                DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
-                return true;
-        }
-        if (mask &&
-            mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
-            gen3_blend_op[op].src_alpha &&
-            gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
-            op != PictOpOver) {
-                DBG(("%s: component-alpha mask with op=%d, should fallback\n",
-                     __FUNCTION__, op));
-                return true;
-        }
-        /* If anything is on the GPU, push everything out to the GPU */
-        if (dst_use_gpu(dst_pixmap)) {
-                DBG(("%s: dst is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        if (src_pixmap && !src_fallback) {
-                DBG(("%s: src is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        if (mask_pixmap && !mask_fallback) {
-                DBG(("%s: mask is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        /* However if the dst is not on the GPU and we need to
-         * render one of the sources using the CPU, we may
-         * as well do the entire operation in place onthe CPU.
-         */
-        if (src_fallback) {
-                DBG(("%s: dst is on the CPU and src will fallback\n",
-                     __FUNCTION__));
-                return true;
-        }
-        if (mask && mask_fallback) {
-                DBG(("%s: dst is on the CPU and mask will fallback\n",
-                     __FUNCTION__));
-                return true;
-        }
-        if (too_large(dst_pixmap->drawable.width,
-                      dst_pixmap->drawable.height) &&
-            dst_is_cpu(dst_pixmap)) {
-                DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
-                return true;
-        }
-        DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
-             __FUNCTION__, dst_use_cpu(dst_pixmap)));
-        return dst_use_cpu(dst_pixmap);
-}
-static bool
-gen3_render_composite(struct sna *sna,
-                      uint8_t op,
-                      PicturePtr src,
-                      PicturePtr mask,
-                      PicturePtr dst,
-                      int16_t src_x,  int16_t src_y,
-                      int16_t mask_x, int16_t mask_y,
-                      int16_t dst_x,  int16_t dst_y,
-                      int16_t width,  int16_t height,
-                      struct sna_composite_op *tmp)
-{
-        DBG(("%s()\n", __FUNCTION__));
-        if (op >= ARRAY_SIZE(gen3_blend_op)) {
-                DBG(("%s: fallback due to unhandled blend op: %d\n",
-                     __FUNCTION__, op));
-                return false;
-        }
-        /* Try to use the BLT engine unless it implies a
-         * 3D -> 2D context switch.
-         */
-        if (mask == NULL &&
-            try_blt(sna, dst, src, width, height) &&
-            sna_blt_composite(sna,
-                              op, src, dst,
-                              src_x, src_y,
-                              dst_x, dst_y,
-                              width, height,
-                              tmp, false))
-                return true;
-        if (gen3_composite_fallback(sna, op, src, mask, dst))
-                return false;
-        if (need_tiling(sna, width, height))
-                return sna_tiling_composite(op, src, mask, dst,
-                                            src_x,  src_y,
-                                            mask_x, mask_y,
-                                            dst_x,  dst_y,
-                                            width,  height,
-                                            tmp);
-        if (!gen3_composite_set_target(sna, tmp, dst,
-                                       dst_x, dst_y, width, height)) {
-                DBG(("%s: unable to set render target\n",
-                     __FUNCTION__));
-                return false;
-        }
-        tmp->op = op;
-        tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
-        if (too_large(tmp->dst.width, tmp->dst.height) ||
-            !gen3_check_pitch_3d(tmp->dst.bo)) {
-                if (!sna_render_composite_redirect(sna, tmp,
-                                                   dst_x, dst_y, width, height,
-                                                   op > PictOpSrc || dst->pCompositeClip->data))
-                        return false;
-        }
-        tmp->u.gen3.num_constants = 0;
-        tmp->src.u.gen3.type = SHADER_TEXTURE;
-        tmp->src.is_affine = true;
-        DBG(("%s: preparing source\n", __FUNCTION__));
-        switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
-                                       src_x, src_y,
-                                       width, height,
-                                       dst_x, dst_y,
-                                       dst->polyMode == PolyModePrecise)) {
-        case -1:
-                goto cleanup_dst;
-        case 0:
-                tmp->src.u.gen3.type = SHADER_ZERO;
-                break;
-        case 1:
-                if (mask == NULL && tmp->src.bo &&
-                    sna_blt_composite__convert(sna,
-                                               dst_x, dst_y, width, height,
-                                               tmp))
-                        return true;
-                gen3_composite_channel_convert(&tmp->src);
-                break;
-        }
-        DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
-        tmp->mask.u.gen3.type = SHADER_NONE;
-        tmp->mask.is_affine = true;
-        tmp->need_magic_ca_pass = false;
-        tmp->has_component_alpha = false;
-        if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
-                if (!reuse_source(sna,
-                                  src, &tmp->src, src_x, src_y,
-                                  mask, &tmp->mask, mask_x, mask_y)) {
-                        tmp->mask.u.gen3.type = SHADER_TEXTURE;
-                        DBG(("%s: preparing mask\n", __FUNCTION__));
-                        switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
-                                                       mask_x, mask_y,
-                                                       width,  height,
-                                                       dst_x,  dst_y,
-                                                       dst->polyMode == PolyModePrecise)) {
-                        case -1:
-                                goto cleanup_src;
-                        case 0:
-                                tmp->mask.u.gen3.type = SHADER_ZERO;
-                                break;
-                        case 1:
-                                gen3_composite_channel_convert(&tmp->mask);
-                                break;
-                        }
-                }
-                DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
-                if (tmp->mask.u.gen3.type == SHADER_ZERO) {
-                        if (tmp->src.bo) {
-                                kgem_bo_destroy(&sna->kgem,
-                                                tmp->src.bo);
-                                tmp->src.bo = NULL;
-                        }
-                        tmp->src.u.gen3.type = SHADER_ZERO;
-                        tmp->mask.u.gen3.type = SHADER_NONE;
-                }
-                if (tmp->mask.u.gen3.type != SHADER_NONE) {
-                        if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
-                                /* Check if it's component alpha that relies on a source alpha
-                                 * and on the source value.  We can only get one of those
-                                 * into the single source value that we get to blend with.
-                                 */
-                                DBG(("%s: component-alpha mask: %d\n",
-                                     __FUNCTION__, tmp->mask.u.gen3.type));
-                                tmp->has_component_alpha = true;
-                                if (tmp->mask.u.gen3.type == SHADER_WHITE) {
-                                        tmp->mask.u.gen3.type = SHADER_NONE;
-                                        tmp->has_component_alpha = false;
-                                } else if (gen3_blend_op[op].src_alpha &&
-                                           gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
-                                        if (op != PictOpOver)
-                                                goto cleanup_mask;
-                                        tmp->need_magic_ca_pass = true;
-                                        tmp->op = PictOpOutReverse;
-                                }
-                        } else {
-                                if (tmp->mask.is_opaque) {
-                                        tmp->mask.u.gen3.type = SHADER_NONE;
-                                } else if (is_constant_ps(tmp->src.u.gen3.type) &&
-                                           is_constant_ps(tmp->mask.u.gen3.type)) {
-                                        uint32_t v;
-                                        v = multa(tmp->src.u.gen3.mode,
-                                                  tmp->mask.u.gen3.mode,
-);
-                                        v |= multa(tmp->src.u.gen3.mode,
-                                                   tmp->mask.u.gen3.mode,
-);
-                                        v |= multa(tmp->src.u.gen3.mode,
-                                                   tmp->mask.u.gen3.mode,
-);
-                                        v |= multa(tmp->src.u.gen3.mode,
-                                                   tmp->mask.u.gen3.mode,
-);
-                                        DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
-                                             __FUNCTION__,
-                                             tmp->src.u.gen3.mode,
-                                             tmp->mask.u.gen3.mode,
-                                             v));
-                                        tmp->src.u.gen3.type = SHADER_CONSTANT;
-                                        tmp->src.u.gen3.mode = v;
-                                        tmp->src.is_opaque = false;
-                                        tmp->mask.u.gen3.type = SHADER_NONE;
-                                }
-                        }
-                }
-        }
-        DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
-             tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
-             tmp->src.is_affine, tmp->mask.is_affine));
-        tmp->prim_emit = gen3_emit_composite_primitive;
-        if (is_constant_ps(tmp->mask.u.gen3.type)) {
-                switch (tmp->src.u.gen3.type) {
-                case SHADER_NONE:
-                case SHADER_ZERO:
-                case SHADER_BLACK:
-                case SHADER_WHITE:
-                case SHADER_CONSTANT:
-#if defined(sse2) && !defined(__x86_64__)
-                        if (sna->cpu_features & SSE2) {
-                                tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
-                                tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
-                        } else
-#endif
+static void
+gen3_align_vertex(struct sna *sna,
+                  const struct sna_composite_op *op)
                         {
-                                tmp->prim_emit = gen3_emit_composite_primitive_constant;
-                                tmp->emit_boxes = gen3_emit_composite_boxes_constant;
-                        }
+        if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
+                if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
+                        gen3_vertex_finish(sna);
-                        break;
-                case SHADER_LINEAR:
-                case SHADER_RADIAL:
-                        if (tmp->src.transform == NULL) {
-#if defined(sse2) && !defined(__x86_64__)
-                                if (sna->cpu_features & SSE2) {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
+                DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+                     sna->render_state.gen3.last_floats_per_vertex,
+                     op->floats_per_vertex,
+                     sna->render.vertex_index,
+                     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+                sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+                sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+                assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
+                sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
                                 }
-                        } else if (tmp->src.is_affine) {
-                                tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
-#if defined(sse2) && !defined(__x86_64__)
-                                if (sna->cpu_features & SSE2) {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
                                 }
-                        }
-                        break;
-                case SHADER_TEXTURE:
-                        if (tmp->src.transform == NULL) {
-                                if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
-#if defined(sse2) && !defined(__x86_64__)
-                                        if (sna->cpu_features & SSE2) {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
-                                                tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
-                                        } else
-#endif
-                                        {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
-                                                tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
-                                        }
-                                } else {
-#if defined(sse2) && !defined(__x86_64__)
-                                        if (sna->cpu_features & SSE2) {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
-                                                tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
-                                        } else
-#endif
-                                        {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
-                                                tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
-                                        }
-                                }
-                        } else if (tmp->src.is_affine) {
-                                tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-                                tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
-#if defined(sse2) && !defined(__x86_64__)
-                                if (sna->cpu_features & SSE2) {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
-                                        tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
-                                }
-                        }
-                        break;
-                }
-        } else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
-                if (tmp->mask.transform == NULL) {
-                        if (is_constant_ps(tmp->src.u.gen3.type)) {
-                                if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
-#if defined(sse2) && !defined(__x86_64__)
-                                        if (sna->cpu_features & SSE2) {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
-                                        } else
-#endif
-                                        {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
-                                        }
-                                } else {
-#if defined(sse2) && !defined(__x86_64__)
-                                        if (sna->cpu_features & SSE2) {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
-                                        } else
-#endif
-                                        {
-                                                tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
-                                        }
-                                }
-                        } else if (tmp->src.transform == NULL) {
-#if defined(sse2) && !defined(__x86_64__)
-                                if (sna->cpu_features & SSE2) {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
-                                }
-                        } else if (tmp->src.is_affine) {
-                                tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-                                tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
-#if defined(sse2) && !defined(__x86_64__)
-                                if (sna->cpu_features & SSE2) {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
-                                }
-                        }
-                }
-        }
-        tmp->floats_per_vertex = 2;
-        if (!is_constant_ps(tmp->src.u.gen3.type))
-                tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
-        if (!is_constant_ps(tmp->mask.u.gen3.type))
-                tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
-        DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
-             !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
-             !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
-             tmp->floats_per_vertex,
-             tmp->prim_emit != gen3_emit_composite_primitive));
-        tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
-        tmp->blt   = gen3_render_composite_blt;
-        tmp->box   = gen3_render_composite_box;
-        tmp->boxes = gen3_render_composite_boxes__blt;
-        if (tmp->emit_boxes) {
-                tmp->boxes = gen3_render_composite_boxes;
-                tmp->thread_boxes = gen3_render_composite_boxes__thread;
-        }
-        tmp->done  = gen3_render_composite_done;
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                           NULL)) {
-                kgem_submit(&sna->kgem);
-                if (!kgem_check_bo(&sna->kgem,
-                                   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                                   NULL))
-                        goto cleanup_mask;
-        }
-        gen3_emit_composite_state(sna, tmp);
-        gen3_align_vertex(sna, tmp);
-        return true;
-cleanup_mask:
-        if (tmp->mask.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
-cleanup_src:
-        if (tmp->src.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->src.bo);
-cleanup_dst:
-        if (tmp->redirect.real_bo)
-                kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
-        return false;
-}
-#endif
 ,6 → 1710,19
+static inline bool is_constant_ps(uint32_t type)
+{
+        switch (type) {
+        case SHADER_NONE: /* be warned! */
+        case SHADER_ZERO:
+        case SHADER_BLACK:
+        case SHADER_WHITE:
+        case SHADER_CONSTANT:
+                return true;
+        default:
+                return false;
+        }
+}
 ,59 → 1825,13
-static void gen3_render_flush(struct sna *sna)
-{
-        gen3_vertex_close(sna);
-        assert(sna->render.vertex_reloc[0] == 0);
-        assert(sna->render.vertex_offset == 0);
-}
-static void
-gen3_render_fini(struct sna *sna)
-{
-}
-const char *gen3_render_init(struct sna *sna, const char *backend)
-{
-        struct sna_render *render = &sna->render;
-#if 0
-#if !NO_COMPOSITE
-        render->composite = gen3_render_composite;
-        render->prefer_gpu |= PREFER_GPU_RENDER;
-#endif
-#if !NO_COMPOSITE_SPANS
-        render->check_composite_spans = gen3_check_composite_spans;
-        render->composite_spans = gen3_render_composite_spans;
-        render->prefer_gpu |= PREFER_GPU_SPANS;
-#endif
-        render->video = gen3_render_video;
-        render->copy_boxes = gen3_render_copy_boxes;
-        render->copy = gen3_render_copy;
-        render->fill_boxes = gen3_render_fill_boxes;
-        render->fill = gen3_render_fill;
-        render->fill_one = gen3_render_fill_one;
-#endif
-    render->blit_tex = gen3_blit_tex;
-    render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
-        render->reset = gen3_render_reset;
-        render->flush = gen3_render_flush;
-        render->fini = gen3_render_fini;
-        render->max_3d_size = MAX_3D_SIZE;
-        render->max_3d_pitch = MAX_3D_PITCH;
-        sna->kgem.retire = gen3_render_retire;
-        sna->kgem.expire = gen3_render_expire;
-        return "Alviso (gen3)";
-}
 static bool
 gen3_blit_tex(struct sna *sna,
               uint8_t op, bool scale,
 ,3 → 1933,39
         gen3_align_vertex(sna, tmp);
         return true;
 }
+static void gen3_render_flush(struct sna *sna)
+{
+        gen3_vertex_close(sna);
+        assert(sna->render.vertex_reloc[0] == 0);
+        assert(sna->render.vertex_offset == 0);
+}
+static void
+gen3_render_fini(struct sna *sna)
+{
+}
+bool gen3_render_init(struct sna *sna)
+{
+        struct sna_render *render = &sna->render;
+//      render->video = gen3_render_video;
+    render->blit_tex = gen3_blit_tex;
+        render->reset = gen3_render_reset;
+        render->flush = gen3_render_flush;
+        render->fini = gen3_render_fini;
+        render->max_3d_size = MAX_3D_SIZE;
+        render->max_3d_pitch = MAX_3D_PITCH;
+    render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
+        sna->kgem.retire = gen3_render_retire;
+        sna->kgem.expire = gen3_render_expire;
+        return true;
+}

 /drivers/video/Intel-2D/kgem-sna.c
 ,0 → 1,4303
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "sna.h"
+#include "sna_reg.h"
+static inline
+int user_free(void *mem)
+{
+    int  val;
+    __asm__ __volatile__(
+    "int $0x40"
+    :"=a"(val)
+    :"a"(68),"b"(12),"c"(mem));
+    return val;
+}
+unsigned int cpu_cache_size();
+static struct kgem_bo *
+search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
+static struct kgem_bo *
+search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
+#define DBG_NO_HW 0
+#define DBG_NO_TILING 1
+#define DBG_NO_CACHE 0
+#define DBG_NO_CACHE_LEVEL 0
+#define DBG_NO_CPU 0
+#define DBG_NO_USERPTR 0
+#define DBG_NO_LLC 0
+#define DBG_NO_SEMAPHORES 0
+#define DBG_NO_MADV 1
+#define DBG_NO_UPLOAD_CACHE 0
+#define DBG_NO_UPLOAD_ACTIVE 0
+#define DBG_NO_MAP_UPLOAD 0
+#define DBG_NO_RELAXED_FENCING 0
+#define DBG_NO_SECURE_BATCHES 0
+#define DBG_NO_PINNED_BATCHES 0
+#define DBG_NO_FAST_RELOC 0
+#define DBG_NO_HANDLE_LUT 0
+#define DBG_DUMP 0
+#ifndef DEBUG_SYNC
+#define DEBUG_SYNC 0
+#endif
+#define SHOW_BATCH 1
+#if 0
+#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
+#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
+#else
+#define ASSERT_IDLE(kgem__, handle__)
+#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
+#endif
+/* Worst case seems to be 965gm where we cannot write within a cacheline that
+ * is being simultaneously being read by the GPU, or within the sampler
+ * prefetch. In general, the chipsets seem to have a requirement that sampler
+ * offsets be aligned to a cacheline (64 bytes).
+ */
+#define UPLOAD_ALIGNMENT 128
+#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
+#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
+#define MAX_GTT_VMA_CACHE 512
+#define MAX_CPU_VMA_CACHE INT16_MAX
+#define MAP_PRESERVE_TIME 10
+#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
+#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
+#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
+#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
+#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
+#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
+#define LOCAL_I915_PARAM_HAS_BLT                        11
+#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING    12
+#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA          15
+#define LOCAL_I915_PARAM_HAS_SEMAPHORES             20
+#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES         23
+#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES         24
+#define LOCAL_I915_PARAM_HAS_NO_RELOC               25
+#define LOCAL_I915_PARAM_HAS_HANDLE_LUT             26
+#define LOCAL_I915_EXEC_IS_PINNED               (1<<10)
+#define LOCAL_I915_EXEC_NO_RELOC                (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT              (1<<12)
+struct local_i915_gem_userptr {
+        uint64_t user_ptr;
+        uint32_t user_size;
+        uint32_t flags;
+#define I915_USERPTR_READ_ONLY (1<<0)
+#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
+        uint32_t handle;
+};
+#define UNCACHED        0
+#define SNOOPED         1
+struct local_i915_gem_cacheing {
+        uint32_t handle;
+        uint32_t cacheing;
+};
+#define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHING
+struct local_fbinfo {
+        int width;
+        int height;
+        int pitch;
+        int tiling;
+};
+struct kgem_buffer {
+        struct kgem_bo base;
+        void *mem;
+        uint32_t used;
+        uint32_t need_io : 1;
+        uint32_t write : 2;
+        uint32_t mmapped : 1;
+};
+static struct kgem_bo *__kgem_freed_bo;
+static struct kgem_request *__kgem_freed_request;
+static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
+static inline int bytes(struct kgem_bo *bo)
+{
+        return __kgem_bo_size(bo);
+}
+#define bucket(B) (B)->size.pages.bucket
+#define num_pages(B) (B)->size.pages.count
+#ifdef DEBUG_MEMORY
+static void debug_alloc(struct kgem *kgem, size_t size)
+{
+        kgem->debug_memory.bo_allocs++;
+        kgem->debug_memory.bo_bytes += size;
+}
+static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
+{
+        debug_alloc(kgem, bytes(bo));
+}
+#else
+#define debug_alloc(k, b)
+#define debug_alloc__bo(k, b)
+#endif
+static void kgem_sna_reset(struct kgem *kgem)
+{
+        struct sna *sna = container_of(kgem, struct sna, kgem);
+        sna->render.reset(sna);
+        sna->blt_state.fill_bo = 0;
+}
+static void kgem_sna_flush(struct kgem *kgem)
+{
+        struct sna *sna = container_of(kgem, struct sna, kgem);
+        sna->render.flush(sna);
+//      if (sna->render.solid_cache.dirty)
+//              sna_render_flush_solid(sna);
+}
+static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
+{
+        struct drm_i915_gem_set_tiling set_tiling;
+        int ret;
+        if (DBG_NO_TILING)
+                return false;
+/*
+        VG_CLEAR(set_tiling);
+        do {
+                set_tiling.handle = handle;
+                set_tiling.tiling_mode = tiling;
+                set_tiling.stride = stride;
+                ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
+        } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+*/
+        return false;//ret == 0;
+}
+static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing)
+{
+        struct local_i915_gem_cacheing arg;
+        VG_CLEAR(arg);
+        arg.handle = handle;
+        arg.cacheing = cacheing;
+        return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0;
+}
+static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
+{
+        if (flags & CREATE_NO_RETIRE) {
+                DBG(("%s: not retiring per-request\n", __FUNCTION__));
+                return false;
+        }
+        if (!kgem->need_retire) {
+                DBG(("%s: nothing to retire\n", __FUNCTION__));
+                return false;
+        }
+        if (kgem_retire(kgem))
+                return true;
+        if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
+                DBG(("%s: not throttling\n", __FUNCTION__));
+                return false;
+        }
+        kgem_throttle(kgem);
+        return kgem_retire(kgem);
+}
+static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
+{
+        struct drm_i915_gem_mmap_gtt mmap_arg;
+        void *ptr;
+        DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
+             bo->handle, bytes(bo)));
+        assert(bo->proxy == NULL);
+retry_gtt:
+        VG_CLEAR(mmap_arg);
+        mmap_arg.handle = bo->handle;
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
+                printf("%s: failed to retrieve GTT offset for handle=%d: %d\n",
+                       __FUNCTION__, bo->handle, 0);
+                (void)__kgem_throttle_retire(kgem, 0);
+                if (kgem_expire_cache(kgem))
+                        goto retry_gtt;
+                if (kgem->need_expire) {
+                        kgem_cleanup_cache(kgem);
+                        goto retry_gtt;
+                }
+                return NULL;
+        }
+retry_mmap:
+        ptr = (void*)(int)mmap_arg.offset;
+        if (ptr == NULL) {
+                printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n",
+                       __FUNCTION__, bo->handle, bytes(bo), 0);
+        }
+        return ptr;
+}
+static int __gem_write(int fd, uint32_t handle,
+                       int offset, int length,
+                       const void *src)
+{
+        struct drm_i915_gem_pwrite pwrite;
+        DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
+             handle, offset, length));
+        VG_CLEAR(pwrite);
+        pwrite.handle = handle;
+        pwrite.offset = offset;
+        pwrite.size = length;
+        pwrite.data_ptr = (uintptr_t)src;
+        return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
+}
+static int gem_write(int fd, uint32_t handle,
+                     int offset, int length,
+                     const void *src)
+{
+        struct drm_i915_gem_pwrite pwrite;
+        DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
+             handle, offset, length));
+        VG_CLEAR(pwrite);
+        pwrite.handle = handle;
+        /* align the transfer to cachelines; fortuitously this is safe! */
+        if ((offset | length) & 63) {
+                pwrite.offset = offset & ~63;
+                pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
+                pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
+        } else {
+                pwrite.offset = offset;
+                pwrite.size = length;
+                pwrite.data_ptr = (uintptr_t)src;
+        }
+        return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
+}
+bool __kgem_busy(struct kgem *kgem, int handle)
+{
+        struct drm_i915_gem_busy busy;
+        VG_CLEAR(busy);
+        busy.handle = handle;
+        busy.busy = !kgem->wedged;
+        (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+        DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
+             __FUNCTION__, handle, busy.busy, kgem->wedged));
+        return busy.busy;
+}
+static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
+{
+        DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
+             __FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
+             __kgem_busy(kgem, bo->handle)));
+        assert(bo->exec == NULL);
+        assert(list_is_empty(&bo->vma));
+        if (bo->rq) {
+                if (!__kgem_busy(kgem, bo->handle)) {
+                        __kgem_bo_clear_busy(bo);
+                        kgem_retire(kgem);
+                }
+        } else {
+                assert(!bo->needs_flush);
+                ASSERT_IDLE(kgem, bo->handle);
+        }
+}
+bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
+                   const void *data, int length)
+{
+        assert(bo->refcnt);
+        assert(!bo->purged);
+        assert(bo->proxy == NULL);
+        ASSERT_IDLE(kgem, bo->handle);
+        assert(length <= bytes(bo));
+        if (gem_write(kgem->fd, bo->handle, 0, length, data))
+                return false;
+        DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
+        if (bo->exec == NULL) {
+                kgem_bo_retire(kgem, bo);
+                bo->domain = DOMAIN_NONE;
+        }
+        return true;
+}
+static uint32_t gem_create(int fd, int num_pages)
+{
+        struct drm_i915_gem_create create;
+        VG_CLEAR(create);
+        create.handle = 0;
+        create.size = PAGE_SIZE * num_pages;
+        (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
+        return create.handle;
+}
+static bool
+kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
+{
+#if DBG_NO_MADV
+        return true;
+#else
+        struct drm_i915_gem_madvise madv;
+        assert(bo->exec == NULL);
+        assert(!bo->purged);
+        VG_CLEAR(madv);
+        madv.handle = bo->handle;
+        madv.madv = I915_MADV_DONTNEED;
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
+                bo->purged = 1;
+                kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
+                return madv.retained;
+        }
+        return true;
+#endif
+}
+static bool
+kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
+{
+#if DBG_NO_MADV
+        return true;
+#else
+        struct drm_i915_gem_madvise madv;
+        if (!bo->purged)
+                return true;
+        VG_CLEAR(madv);
+        madv.handle = bo->handle;
+        madv.madv = I915_MADV_DONTNEED;
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
+                return madv.retained;
+        return false;
+#endif
+}
+static bool
+kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
+{
+#if DBG_NO_MADV
+        return true;
+#else
+        struct drm_i915_gem_madvise madv;
+        assert(bo->purged);
+        VG_CLEAR(madv);
+        madv.handle = bo->handle;
+        madv.madv = I915_MADV_WILLNEED;
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
+                bo->purged = !madv.retained;
+                kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
+                return madv.retained;
+        }
+        return false;
+#endif
+}
+static void gem_close(int fd, uint32_t handle)
+{
+        struct drm_gem_close close;
+        VG_CLEAR(close);
+        close.handle = handle;
+        (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+constant inline static unsigned long __fls(unsigned long word)
+{
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
+        asm("bsr %1,%0"
+            : "=r" (word)
+            : "rm" (word));
+        return word;
+#else
+        unsigned int v = 0;
+        while (word >>= 1)
+                v++;
+        return v;
+#endif
+}
+constant inline static int cache_bucket(int num_pages)
+{
+        return __fls(num_pages);
+}
+static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
+                                      int handle, int num_pages)
+{
+        assert(num_pages);
+        memset(bo, 0, sizeof(*bo));
+        bo->refcnt = 1;
+        bo->handle = handle;
+        bo->target_handle = -1;
+        num_pages(bo) = num_pages;
+        bucket(bo) = cache_bucket(num_pages);
+        bo->reusable = true;
+        bo->domain = DOMAIN_CPU;
+        list_init(&bo->request);
+        list_init(&bo->list);
+        list_init(&bo->vma);
+        return bo;
+}
+static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
+{
+        struct kgem_bo *bo;
+        if (__kgem_freed_bo) {
+                bo = __kgem_freed_bo;
+                __kgem_freed_bo = *(struct kgem_bo **)bo;
+        } else {
+                bo = malloc(sizeof(*bo));
+                if (bo == NULL)
+                        return NULL;
+        }
+        return __kgem_bo_init(bo, handle, num_pages);
+}
+static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
+{
+        struct kgem_request *rq;
+        rq = __kgem_freed_request;
+        if (rq) {
+                __kgem_freed_request = *(struct kgem_request **)rq;
+        } else {
+                rq = malloc(sizeof(*rq));
+                if (rq == NULL)
+                        rq = &kgem->static_request;
+        }
+        list_init(&rq->buffers);
+        rq->bo = NULL;
+        rq->ring = 0;
+        return rq;
+}
+static void __kgem_request_free(struct kgem_request *rq)
+{
+        _list_del(&rq->list);
+        *(struct kgem_request **)rq = __kgem_freed_request;
+        __kgem_freed_request = rq;
+}
+static struct list *inactive(struct kgem *kgem, int num_pages)
+{
+        assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
+        assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
+        return &kgem->inactive[cache_bucket(num_pages)];
+}
+static struct list *active(struct kgem *kgem, int num_pages, int tiling)
+{
+        assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
+        assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
+        return &kgem->active[cache_bucket(num_pages)][tiling];
+}
+static size_t
+agp_aperture_size(struct pci_device *dev, unsigned gen)
+{
+        /* XXX assume that only future chipsets are unknown and follow
+         * the post gen2 PCI layout.
+         */
+//      return dev->regions[gen < 030 ? 0 : 2].size;
+    return 0;
+}
+static size_t
+total_ram_size(void)
+{
+    uint32_t  data[9];
+    size_t    size = 0;
+    asm volatile("int $0x40"
+        : "=a" (size)
+        : "a" (18),"b"(20), "c" (data)
+        : "memory");
+    return size != -1 ? size : 0;
+}
+static int gem_param(struct kgem *kgem, int name)
+{
+    drm_i915_getparam_t gp;
+    int v = -1; /* No param uses the sign bit, reserve it for errors */
+    VG_CLEAR(gp);
+    gp.param = name;
+    gp.value = &v;
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
+        return -1;
+    VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
+    return v;
+}
+static bool test_has_execbuffer2(struct kgem *kgem)
+{
+        return 1;
+}
+static bool test_has_no_reloc(struct kgem *kgem)
+{
+        if (DBG_NO_FAST_RELOC)
+                return false;
+        return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
+}
+static bool test_has_handle_lut(struct kgem *kgem)
+{
+        if (DBG_NO_HANDLE_LUT)
+                return false;
+        return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
+}
+static bool test_has_semaphores_enabled(struct kgem *kgem)
+{
+        bool detected = false;
+        int ret;
+        if (DBG_NO_SEMAPHORES)
+                return false;
+        ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
+        if (ret != -1)
+                return ret > 0;
+        return detected;
+}
+static bool __kgem_throttle(struct kgem *kgem)
+{
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0)
+                return false;
+        return errno == EIO;
+}
+static bool is_hw_supported(struct kgem *kgem,
+                            struct pci_device *dev)
+{
+        if (DBG_NO_HW)
+                return false;
+        if (!test_has_execbuffer2(kgem))
+                return false;
+        if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
+                return kgem->has_blt;
+        /* Although pre-855gm the GMCH is fubar, it works mostly. So
+         * let the user decide through "NoAccel" whether or not to risk
+         * hw acceleration.
+         */
+        if (kgem->gen == 060 && dev->revision < 8) {
+                /* pre-production SNB with dysfunctional BLT */
+                return false;
+        }
+        if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
+                return kgem->has_blt;
+        return true;
+}
+static bool test_has_relaxed_fencing(struct kgem *kgem)
+{
+        if (kgem->gen < 040) {
+                if (DBG_NO_RELAXED_FENCING)
+                        return false;
+                return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
+        } else
+                return true;
+}
+static bool test_has_llc(struct kgem *kgem)
+{
+        int has_llc = -1;
+        if (DBG_NO_LLC)
+                return false;
+#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
+        has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
+#endif
+        if (has_llc == -1) {
+                DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
+                has_llc = kgem->gen >= 060;
+        }
+        return has_llc;
+}
+static bool test_has_cacheing(struct kgem *kgem)
+{
+        uint32_t handle;
+        bool ret;
+        if (DBG_NO_CACHE_LEVEL)
+                return false;
+        /* Incoherent blt and sampler hangs the GPU */
+        if (kgem->gen == 040)
+                return false;
+        handle = gem_create(kgem->fd, 1);
+        if (handle == 0)
+                return false;
+        ret = gem_set_cacheing(kgem->fd, handle, UNCACHED);
+        gem_close(kgem->fd, handle);
+        return ret;
+}
+static bool test_has_userptr(struct kgem *kgem)
+{
+#if defined(USE_USERPTR)
+        uint32_t handle;
+        void *ptr;
+        if (DBG_NO_USERPTR)
+                return false;
+        /* Incoherent blt and sampler hangs the GPU */
+        if (kgem->gen == 040)
+                return false;
+        ptr = malloc(PAGE_SIZE);
+        handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
+        gem_close(kgem->fd, handle);
+        free(ptr);
+        return handle != 0;
+#else
+        return false;
+#endif
+}
+static bool test_has_secure_batches(struct kgem *kgem)
+{
+        if (DBG_NO_SECURE_BATCHES)
+                return false;
+        return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
+}
+static bool test_has_pinned_batches(struct kgem *kgem)
+{
+        if (DBG_NO_PINNED_BATCHES)
+                return false;
+        return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
+}
+static bool kgem_init_pinned_batches(struct kgem *kgem)
+{
+        int count[2] = { 2, 1 };
+        int size[2] = { 1, 2 };
+        int n, i;
+        if (kgem->wedged)
+                return true;
+        for (n = 0; n < ARRAY_SIZE(count); n++) {
+                for (i = 0; i < count[n]; i++) {
+                        struct drm_i915_gem_pin pin;
+                        struct kgem_bo *bo;
+                        VG_CLEAR(pin);
+                        pin.handle = gem_create(kgem->fd, size[n]);
+                        if (pin.handle == 0)
+                                goto err;
+                        DBG(("%s: new handle=%d, num_pages=%d\n",
+                             __FUNCTION__, pin.handle, size[n]));
+                        bo = __kgem_bo_alloc(pin.handle, size[n]);
+                        if (bo == NULL) {
+                                gem_close(kgem->fd, pin.handle);
+                                goto err;
+                        }
+                        pin.alignment = 0;
+                        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
+                                gem_close(kgem->fd, pin.handle);
+                                goto err;
+                        }
+                        bo->presumed_offset = pin.offset;
+                        debug_alloc__bo(kgem, bo);
+                        list_add(&bo->list, &kgem->pinned_batches[n]);
+                }
+        }
+        return true;
+err:
+        for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
+                while (!list_is_empty(&kgem->pinned_batches[n])) {
+                        kgem_bo_destroy(kgem,
+                                        list_first_entry(&kgem->pinned_batches[n],
+                                                         struct kgem_bo, list));
+                }
+        }
+        /* For simplicity populate the lists with a single unpinned bo */
+        for (n = 0; n < ARRAY_SIZE(count); n++) {
+                struct kgem_bo *bo;
+                uint32_t handle;
+                handle = gem_create(kgem->fd, size[n]);
+                if (handle == 0)
+                        break;
+                bo = __kgem_bo_alloc(handle, size[n]);
+                if (bo == NULL) {
+                        gem_close(kgem->fd, handle);
+                        break;
+                }
+                debug_alloc__bo(kgem, bo);
+                list_add(&bo->list, &kgem->pinned_batches[n]);
+        }
+        return false;
+}
+void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
+{
+    struct drm_i915_gem_get_aperture aperture;
+    size_t totalram;
+    unsigned half_gpu_max;
+    unsigned int i, j;
+    DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
+    memset(kgem, 0, sizeof(*kgem));
+    kgem->fd = fd;
+    kgem->gen = gen;
+    list_init(&kgem->requests[0]);
+    list_init(&kgem->requests[1]);
+    list_init(&kgem->batch_buffers);
+    list_init(&kgem->active_buffers);
+    list_init(&kgem->flushing);
+    list_init(&kgem->large);
+    list_init(&kgem->large_inactive);
+    list_init(&kgem->snoop);
+    list_init(&kgem->scanout);
+    for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
+        list_init(&kgem->pinned_batches[i]);
+    for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+        list_init(&kgem->inactive[i]);
+    for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
+        for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
+            list_init(&kgem->active[i][j]);
+    }
+    for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
+        for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
+            list_init(&kgem->vma[i].inactive[j]);
+    }
+    kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
+    kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
+    kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
+    DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
+         kgem->has_blt));
+    kgem->has_relaxed_delta =
+        gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
+    DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
+         kgem->has_relaxed_delta));
+    kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
+    DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
+         kgem->has_relaxed_fencing));
+    kgem->has_llc = test_has_llc(kgem);
+    DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
+         kgem->has_llc));
+    kgem->has_cacheing = test_has_cacheing(kgem);
+    DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
+         kgem->has_cacheing));
+    kgem->has_userptr = test_has_userptr(kgem);
+    DBG(("%s: has userptr? %d\n", __FUNCTION__,
+         kgem->has_userptr));
+    kgem->has_no_reloc = test_has_no_reloc(kgem);
+    DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
+         kgem->has_no_reloc));
+    kgem->has_handle_lut = test_has_handle_lut(kgem);
+    DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
+         kgem->has_handle_lut));
+    kgem->has_semaphores = false;
+    if (kgem->has_blt && test_has_semaphores_enabled(kgem))
+        kgem->has_semaphores = true;
+    DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
+         kgem->has_semaphores));
+    kgem->can_blt_cpu = gen >= 030;
+    DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
+         kgem->can_blt_cpu));
+    kgem->has_secure_batches = test_has_secure_batches(kgem);
+    DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
+         kgem->has_secure_batches));
+    kgem->has_pinned_batches = test_has_pinned_batches(kgem);
+    DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
+         kgem->has_pinned_batches));
+    if (!is_hw_supported(kgem, dev)) {
+        printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
+        kgem->wedged = 1;
+    } else if (__kgem_throttle(kgem)) {
+        printf("Detected a hung GPU, disabling acceleration.\n");
+        kgem->wedged = 1;
+    }
+    kgem->batch_size = ARRAY_SIZE(kgem->batch);
+    if (gen == 020 && !kgem->has_pinned_batches)
+        /* Limited to what we can pin */
+        kgem->batch_size = 4*1024;
+    if (gen == 022)
+        /* 865g cannot handle a batch spanning multiple pages */
+        kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
+    if ((gen >> 3) == 7)
+        kgem->batch_size = 16*1024;
+    if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
+        kgem->batch_size = 4*1024;
+    if (!kgem_init_pinned_batches(kgem) && gen == 020) {
+        printf("Unable to reserve memory for GPU, disabling acceleration.\n");
+        kgem->wedged = 1;
+    }
+    DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
+         kgem->batch_size));
+    kgem->min_alignment = 16;
+    if (gen < 040)
+        kgem->min_alignment = 64;
+    kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
+    DBG(("%s: half cpu cache %d pages\n", __FUNCTION__,
+         kgem->half_cpu_cache_pages));
+    kgem->next_request = __kgem_request_alloc(kgem);
+    DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
+         !DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing),
+         kgem->has_llc, kgem->has_cacheing, kgem->has_userptr));
+    VG_CLEAR(aperture);
+    aperture.aper_size = 0;
+        (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+    if (aperture.aper_size == 0)
+        aperture.aper_size = 64*1024*1024;
+    DBG(("%s: aperture size %lld, available now %lld\n",
+         __FUNCTION__,
+         (long long)aperture.aper_size,
+         (long long)aperture.aper_available_size));
+    kgem->aperture_total = aperture.aper_size;
+    kgem->aperture_high = aperture.aper_size * 3/4;
+    kgem->aperture_low = aperture.aper_size * 1/3;
+    if (gen < 033) {
+        /* Severe alignment penalties */
+        kgem->aperture_high /= 2;
+        kgem->aperture_low /= 2;
+    }
+    DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
+         kgem->aperture_low, kgem->aperture_low / (1024*1024),
+         kgem->aperture_high, kgem->aperture_high / (1024*1024)));
+    kgem->aperture_mappable = agp_aperture_size(dev, gen);
+    if (kgem->aperture_mappable == 0 ||
+        kgem->aperture_mappable > aperture.aper_size)
+        kgem->aperture_mappable = aperture.aper_size;
+    DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
+         kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
+    kgem->buffer_size = 64 * 1024;
+    while (kgem->buffer_size < kgem->aperture_mappable >> 10)
+        kgem->buffer_size *= 2;
+    if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
+        kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
+    DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
+         kgem->buffer_size, kgem->buffer_size / 1024));
+    kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
+    kgem->max_gpu_size = kgem->max_object_size;
+    if (!kgem->has_llc)
+        kgem->max_gpu_size = MAX_CACHE_SIZE;
+    totalram = total_ram_size();
+    if (totalram == 0) {
+        DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
+             __FUNCTION__));
+        totalram = kgem->aperture_total;
+    }
+    DBG(("%s: total ram=%u\n", __FUNCTION__, totalram));
+    if (kgem->max_object_size > totalram / 2)
+        kgem->max_object_size = totalram / 2;
+    if (kgem->max_gpu_size > totalram / 4)
+        kgem->max_gpu_size = totalram / 4;
+    kgem->max_cpu_size = kgem->max_object_size;
+    half_gpu_max = kgem->max_gpu_size / 2;
+    kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
+    if (kgem->max_copy_tile_size > half_gpu_max)
+        kgem->max_copy_tile_size = half_gpu_max;
+    if (kgem->has_llc)
+        kgem->max_upload_tile_size = kgem->max_copy_tile_size;
+    else
+        kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
+    if (kgem->max_upload_tile_size > half_gpu_max)
+        kgem->max_upload_tile_size = half_gpu_max;
+    kgem->large_object_size = MAX_CACHE_SIZE;
+    if (kgem->large_object_size > kgem->max_gpu_size)
+        kgem->large_object_size = kgem->max_gpu_size;
+    if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) {
+        if (kgem->large_object_size > kgem->max_cpu_size)
+            kgem->large_object_size = kgem->max_cpu_size;
+    } else
+        kgem->max_cpu_size = 0;
+    if (DBG_NO_CPU)
+        kgem->max_cpu_size = 0;
+    DBG(("%s: maximum object size=%d\n",
+         __FUNCTION__, kgem->max_object_size));
+    DBG(("%s: large object thresold=%d\n",
+         __FUNCTION__, kgem->large_object_size));
+    DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
+         __FUNCTION__,
+         kgem->max_gpu_size, kgem->max_cpu_size,
+         kgem->max_upload_tile_size, kgem->max_copy_tile_size));
+    /* Convert the aperture thresholds to pages */
+    kgem->aperture_low /= PAGE_SIZE;
+    kgem->aperture_high /= PAGE_SIZE;
+    kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
+    if ((int)kgem->fence_max < 0)
+        kgem->fence_max = 5; /* minimum safe value for all hw */
+    DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
+    kgem->batch_flags_base = 0;
+    if (kgem->has_no_reloc)
+        kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
+    if (kgem->has_handle_lut)
+        kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
+    if (kgem->has_pinned_batches)
+        kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
+}
+/* XXX hopefully a good approximation */
+static uint32_t kgem_get_unique_id(struct kgem *kgem)
+{
+        uint32_t id;
+        id = ++kgem->unique_id;
+        if (id == 0)
+                id = ++kgem->unique_id;
+        return id;
+}
+inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
+{
+        if (flags & CREATE_PRIME)
+                return 256;
+        if (flags & CREATE_SCANOUT)
+                return 64;
+        return kgem->min_alignment;
+}
+static uint32_t kgem_untiled_pitch(struct kgem *kgem,
+                                   uint32_t width, uint32_t bpp,
+                                   unsigned flags)
+{
+        width = ALIGN(width, 2) * bpp >> 3;
+        return ALIGN(width, kgem_pitch_alignment(kgem, flags));
+}
+uint32_t kgem_surface_size(struct kgem *kgem,
+                                  bool relaxed_fencing,
+                                  unsigned flags,
+                                  uint32_t width,
+                                  uint32_t height,
+                                  uint32_t bpp,
+                                  uint32_t tiling,
+                                  uint32_t *pitch)
+{
+        uint32_t tile_width, tile_height;
+        uint32_t size;
+        assert(width <= MAXSHORT);
+        assert(height <= MAXSHORT);
+        if (kgem->gen <= 030) {
+                if (tiling) {
+                        if (kgem->gen < 030) {
+                                tile_width = 128;
+                                tile_height = 32;
+                        } else {
+                                tile_width = 512;
+                                tile_height = 16;
+                        }
+                } else {
+                        tile_width = 2 * bpp >> 3;
+                        tile_width = ALIGN(tile_width,
+                                           kgem_pitch_alignment(kgem, flags));
+                        tile_height = 2;
+                }
+        } else switch (tiling) {
+        default:
+        case I915_TILING_NONE:
+                tile_width = 2 * bpp >> 3;
+                tile_width = ALIGN(tile_width,
+                                   kgem_pitch_alignment(kgem, flags));
+                tile_height = 2;
+                break;
+                /* XXX align to an even tile row */
+        case I915_TILING_X:
+                tile_width = 512;
+                tile_height = 16;
+                break;
+        case I915_TILING_Y:
+                tile_width = 128;
+                tile_height = 64;
+                break;
+        }
+        *pitch = ALIGN(width * bpp / 8, tile_width);
+        height = ALIGN(height, tile_height);
+        if (kgem->gen >= 040)
+                return PAGE_ALIGN(*pitch * height);
+        /* If it is too wide for the blitter, don't even bother.  */
+        if (tiling != I915_TILING_NONE) {
+                if (*pitch > 8192)
+                        return 0;
+                for (size = tile_width; size < *pitch; size <<= 1)
+                        ;
+                *pitch = size;
+        } else {
+                if (*pitch >= 32768)
+                        return 0;
+        }
+        size = *pitch * height;
+        if (relaxed_fencing || tiling == I915_TILING_NONE)
+                return PAGE_ALIGN(size);
+        /*  We need to allocate a pot fence region for a tiled buffer. */
+        if (kgem->gen < 030)
+                tile_width = 512 * 1024;
+        else
+                tile_width = 1024 * 1024;
+        while (tile_width < size)
+                tile_width *= 2;
+        return tile_width;
+}
+static uint32_t kgem_aligned_height(struct kgem *kgem,
+                                    uint32_t height, uint32_t tiling)
+{
+        uint32_t tile_height;
+        if (kgem->gen <= 030) {
+                tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
+        } else switch (tiling) {
+                /* XXX align to an even tile row */
+        default:
+        case I915_TILING_NONE:
+                tile_height = 1;
+                break;
+        case I915_TILING_X:
+                tile_height = 16;
+                break;
+        case I915_TILING_Y:
+                tile_height = 64;
+                break;
+        }
+        return ALIGN(height, tile_height);
+}
+static struct drm_i915_gem_exec_object2 *
+kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
+{
+        struct drm_i915_gem_exec_object2 *exec;
+        DBG(("%s: handle=%d, index=%d\n",
+             __FUNCTION__, bo->handle, kgem->nexec));
+        assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+        bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
+        exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
+        exec->handle = bo->handle;
+        exec->offset = bo->presumed_offset;
+        kgem->aperture += num_pages(bo);
+        return exec;
+}
+static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
+{
+        bo->exec = kgem_add_handle(kgem, bo);
+        bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
+        list_move_tail(&bo->request, &kgem->next_request->buffers);
+        /* XXX is it worth working around gcc here? */
+        kgem->flush |= bo->flush;
+}
+static uint32_t kgem_end_batch(struct kgem *kgem)
+{
+        kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
+        if (kgem->nbatch & 1)
+                kgem->batch[kgem->nbatch++] = MI_NOOP;
+        return kgem->nbatch;
+}
+static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
+{
+        int n;
+        if (kgem->nreloc__self == 0)
+                return;
+        for (n = 0; n < kgem->nreloc__self; n++) {
+                int i = kgem->reloc__self[n];
+                assert(kgem->reloc[i].target_handle == ~0U);
+                kgem->reloc[i].target_handle = bo->target_handle;
+                kgem->reloc[i].presumed_offset = bo->presumed_offset;
+                kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
+                        kgem->reloc[i].delta + bo->presumed_offset;
+        }
+        if (n == 256) {
+                for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
+                        if (kgem->reloc[n].target_handle == ~0U) {
+                                kgem->reloc[n].target_handle = bo->target_handle;
+                                kgem->reloc[n].presumed_offset = bo->presumed_offset;
+                                kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+                                        kgem->reloc[n].delta + bo->presumed_offset;
+                        }
+                }
+        }
+}
+static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
+{
+        struct kgem_bo_binding *b;
+        b = bo->binding.next;
+        while (b) {
+                struct kgem_bo_binding *next = b->next;
+                free (b);
+                b = next;
+        }
+}
+static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
+{
+        int type = IS_CPU_MAP(bo->map);
+        assert(!IS_USER_MAP(bo->map));
+        DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
+             __FUNCTION__, type ? "CPU" : "GTT",
+             bo->handle, kgem->vma[type].count));
+        VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
+        user_free(MAP(bo->map));
+        bo->map = NULL;
+        if (!list_is_empty(&bo->vma)) {
+                list_del(&bo->vma);
+                kgem->vma[type].count--;
+        }
+}
+static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
+{
+        DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
+        assert(bo->refcnt == 0);
+        assert(bo->exec == NULL);
+        assert(!bo->snoop || bo->rq == NULL);
+#ifdef DEBUG_MEMORY
+        kgem->debug_memory.bo_allocs--;
+        kgem->debug_memory.bo_bytes -= bytes(bo);
+#endif
+        kgem_bo_binding_free(kgem, bo);
+        if (IS_USER_MAP(bo->map)) {
+                assert(bo->rq == NULL);
+                assert(MAP(bo->map) != bo || bo->io);
+                if (bo != MAP(bo->map)) {
+                        DBG(("%s: freeing snooped base\n", __FUNCTION__));
+                        free(MAP(bo->map));
+                }
+                bo->map = NULL;
+        }
+        if (bo->map)
+                kgem_bo_release_map(kgem, bo);
+        assert(list_is_empty(&bo->vma));
+        _list_del(&bo->list);
+        _list_del(&bo->request);
+        gem_close(kgem->fd, bo->handle);
+        if (!bo->io) {
+                *(struct kgem_bo **)bo = __kgem_freed_bo;
+                __kgem_freed_bo = bo;
+        } else
+                free(bo);
+}
+inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
+                                            struct kgem_bo *bo)
+{
+        DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
+        assert(bo->refcnt == 0);
+        assert(bo->reusable);
+        assert(bo->rq == NULL);
+        assert(bo->exec == NULL);
+        assert(bo->domain != DOMAIN_GPU);
+        assert(!bo->proxy);
+        assert(!bo->io);
+        assert(!bo->scanout);
+        assert(!bo->needs_flush);
+        assert(list_is_empty(&bo->vma));
+        ASSERT_IDLE(kgem, bo->handle);
+        kgem->need_expire = true;
+        if (bucket(bo) >= NUM_CACHE_BUCKETS) {
+                list_move(&bo->list, &kgem->large_inactive);
+                return;
+        }
+        assert(bo->flush == false);
+        list_move(&bo->list, &kgem->inactive[bucket(bo)]);
+        if (bo->map) {
+                int type = IS_CPU_MAP(bo->map);
+                if (bucket(bo) >= NUM_CACHE_BUCKETS ||
+                    (!type && !__kgem_bo_is_mappable(kgem, bo))) {
+//                      munmap(MAP(bo->map), bytes(bo));
+                        bo->map = NULL;
+                }
+                if (bo->map) {
+                        list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
+                        kgem->vma[type].count++;
+                }
+        }
+}
+static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
+{
+        struct kgem_bo *base;
+        if (!bo->io)
+                return bo;
+        assert(!bo->snoop);
+        base = malloc(sizeof(*base));
+        if (base) {
+                DBG(("%s: transferring io handle=%d to bo\n",
+                     __FUNCTION__, bo->handle));
+                /* transfer the handle to a minimum bo */
+                memcpy(base, bo, sizeof(*base));
+                base->io = false;
+                list_init(&base->list);
+                list_replace(&bo->request, &base->request);
+                list_replace(&bo->vma, &base->vma);
+                free(bo);
+                bo = base;
+        } else
+                bo->reusable = false;
+        return bo;
+}
+inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
+                                                struct kgem_bo *bo)
+{
+        DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
+        list_del(&bo->list);
+        assert(bo->rq == NULL);
+        assert(bo->exec == NULL);
+        if (bo->map) {
+                assert(!list_is_empty(&bo->vma));
+                list_del(&bo->vma);
+                kgem->vma[IS_CPU_MAP(bo->map)].count--;
+        }
+}
+inline static void kgem_bo_remove_from_active(struct kgem *kgem,
+                                              struct kgem_bo *bo)
+{
+        DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
+        list_del(&bo->list);
+        assert(bo->rq != NULL);
+        if (bo->rq == (void *)kgem)
+                list_del(&bo->request);
+        assert(list_is_empty(&bo->vma));
+}
+static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->scanout);
+        assert(!bo->refcnt);
+        assert(bo->exec == NULL);
+        assert(bo->proxy == NULL);
+        DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
+             __FUNCTION__, bo->handle, bo->delta, bo->reusable));
+        if (bo->delta) {
+                /* XXX will leak if we are not DRM_MASTER. *shrug* */
+//              drmModeRmFB(kgem->fd, bo->delta);
+                bo->delta = 0;
+        }
+        bo->scanout = false;
+        bo->flush = false;
+        bo->reusable = true;
+        if (kgem->has_llc &&
+            !gem_set_cacheing(kgem->fd, bo->handle, SNOOPED))
+                bo->reusable = false;
+}
+static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
+{
+        struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
+        DBG(("%s: size=%d, offset=%d, parent used=%d\n",
+             __FUNCTION__, bo->size.bytes, bo->delta, io->used));
+        if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
+                io->used = bo->delta;
+}
+static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->refcnt == 0);
+        assert(bo->scanout);
+        assert(bo->delta);
+        assert(!bo->snoop);
+        assert(!bo->io);
+        DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
+             __FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
+        if (bo->rq)
+                list_move_tail(&bo->list, &kgem->scanout);
+        else
+        list_move(&bo->list, &kgem->scanout);
+}
+static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->refcnt == 0);
+        assert(bo->exec == NULL);
+        if (num_pages(bo) > kgem->max_cpu_size >> 13) {
+                DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
+                     __FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
+                kgem_bo_free(kgem, bo);
+                return;
+        }
+        assert(bo->tiling == I915_TILING_NONE);
+        assert(bo->rq == NULL);
+        DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
+        list_add(&bo->list, &kgem->snoop);
+}
+static struct kgem_bo *
+search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
+{
+        struct kgem_bo *bo, *first = NULL;
+        DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
+        if ((kgem->has_cacheing | kgem->has_userptr) == 0)
+                return NULL;
+        if (list_is_empty(&kgem->snoop)) {
+                DBG(("%s: inactive and cache empty\n", __FUNCTION__));
+                if (!__kgem_throttle_retire(kgem, flags)) {
+                        DBG(("%s: nothing retired\n", __FUNCTION__));
+                        return NULL;
+                }
+        }
+        list_for_each_entry(bo, &kgem->snoop, list) {
+                assert(bo->refcnt == 0);
+                assert(bo->snoop);
+                assert(!bo->scanout);
+                assert(bo->proxy == NULL);
+                assert(bo->tiling == I915_TILING_NONE);
+                assert(bo->rq == NULL);
+                assert(bo->exec == NULL);
+                if (num_pages > num_pages(bo))
+                        continue;
+                if (num_pages(bo) > 2*num_pages) {
+                        if (first == NULL)
+                                first = bo;
+                        continue;
+                }
+                list_del(&bo->list);
+                bo->pitch = 0;
+                bo->delta = 0;
+                DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
+                     __FUNCTION__, bo->handle, num_pages(bo)));
+                return bo;
+        }
+        if (first) {
+                list_del(&first->list);
+                first->pitch = 0;
+                first->delta = 0;
+                DBG(("  %s: found handle=%d (num_pages=%d) in snoop cache\n",
+                     __FUNCTION__, first->handle, num_pages(first)));
+                return first;
+        }
+        return NULL;
+}
+static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+        DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
+        assert(list_is_empty(&bo->list));
+        assert(bo->refcnt == 0);
+        assert(!bo->purged);
+        assert(bo->proxy == NULL);
+        bo->binding.offset = 0;
+        if (DBG_NO_CACHE)
+                goto destroy;
+        if (bo->snoop && !bo->flush) {
+                DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
+                assert(!bo->flush);
+                assert(list_is_empty(&bo->list));
+                if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
+                        __kgem_bo_clear_busy(bo);
+                if (bo->rq == NULL) {
+                        assert(!bo->needs_flush);
+                        kgem_bo_move_to_snoop(kgem, bo);
+                }
+                return;
+        }
+        if (bo->scanout) {
+                kgem_bo_move_to_scanout(kgem, bo);
+                return;
+        }
+        if (bo->io)
+                bo = kgem_bo_replace_io(bo);
+        if (!bo->reusable) {
+                DBG(("%s: handle=%d, not reusable\n",
+                     __FUNCTION__, bo->handle));
+                goto destroy;
+        }
+        if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
+                kgem_bo_release_map(kgem, bo);
+        assert(list_is_empty(&bo->vma));
+        assert(list_is_empty(&bo->list));
+        assert(bo->snoop == false);
+        assert(bo->io == false);
+        assert(bo->scanout == false);
+        if (bo->exec && kgem->nexec == 1) {
+                DBG(("%s: only handle in batch, discarding last operations\n",
+                     __FUNCTION__));
+                assert(bo->exec == &kgem->exec[0]);
+                assert(kgem->exec[0].handle == bo->handle);
+                assert(RQ(bo->rq) == kgem->next_request);
+                bo->refcnt = 1;
+                kgem_reset(kgem);
+                bo->refcnt = 0;
+        }
+        if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
+                __kgem_bo_clear_busy(bo);
+        if (bo->rq) {
+                struct list *cache;
+                DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
+                if (bucket(bo) < NUM_CACHE_BUCKETS)
+                        cache = &kgem->active[bucket(bo)][bo->tiling];
+                else
+                        cache = &kgem->large;
+                list_add(&bo->list, cache);
+                return;
+        }
+        assert(bo->exec == NULL);
+        assert(list_is_empty(&bo->request));
+        if (!IS_CPU_MAP(bo->map)) {
+                if (!kgem_bo_set_purgeable(kgem, bo))
+                        goto destroy;
+                if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
+                        goto destroy;
+                DBG(("%s: handle=%d, purged\n",
+                     __FUNCTION__, bo->handle));
+        }
+        kgem_bo_move_to_inactive(kgem, bo);
+        return;
+destroy:
+        if (!bo->exec)
+                kgem_bo_free(kgem, bo);
+}
+static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->refcnt);
+        if (--bo->refcnt == 0)
+                __kgem_bo_destroy(kgem, bo);
+}
+static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
+{
+        while (!list_is_empty(&bo->base.vma)) {
+                struct kgem_bo *cached;
+                cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
+                assert(cached->proxy == &bo->base);
+                list_del(&cached->vma);
+                assert(*(struct kgem_bo **)cached->map == cached);
+                *(struct kgem_bo **)cached->map = NULL;
+                cached->map = NULL;
+                kgem_bo_destroy(kgem, cached);
+        }
+}
+static bool kgem_retire__buffers(struct kgem *kgem)
+{
+        bool retired = false;
+        while (!list_is_empty(&kgem->active_buffers)) {
+                struct kgem_buffer *bo =
+                        list_last_entry(&kgem->active_buffers,
+                                        struct kgem_buffer,
+                                        base.list);
+                if (bo->base.rq)
+                        break;
+                DBG(("%s: releasing upload cache for handle=%d? %d\n",
+                     __FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
+                list_del(&bo->base.list);
+                kgem_buffer_release(kgem, bo);
+                kgem_bo_unref(kgem, &bo->base);
+                retired = true;
+        }
+        return retired;
+}
+static bool kgem_retire__flushing(struct kgem *kgem)
+{
+        struct kgem_bo *bo, *next;
+        bool retired = false;
+        list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
+                assert(bo->rq == (void *)kgem);
+                assert(bo->exec == NULL);
+                if (__kgem_busy(kgem, bo->handle))
+                        break;
+                __kgem_bo_clear_busy(bo);
+                if (bo->refcnt)
+                        continue;
+                if (bo->snoop) {
+                        kgem_bo_move_to_snoop(kgem, bo);
+                } else if (bo->scanout) {
+                        kgem_bo_move_to_scanout(kgem, bo);
+                } else if ((bo = kgem_bo_replace_io(bo))->reusable &&
+                           kgem_bo_set_purgeable(kgem, bo)) {
+                        kgem_bo_move_to_inactive(kgem, bo);
+                        retired = true;
+                } else
+                        kgem_bo_free(kgem, bo);
+        }
+#if HAS_DEBUG_FULL
+        {
+                int count = 0;
+                list_for_each_entry(bo, &kgem->flushing, request)
+                        count++;
+                printf("%s: %d bo on flushing list\n", __FUNCTION__, count);
+        }
+#endif
+        kgem->need_retire |= !list_is_empty(&kgem->flushing);
+        return retired;
+}
+static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
+{
+        bool retired = false;
+        DBG(("%s: request %d complete\n",
+             __FUNCTION__, rq->bo->handle));
+        while (!list_is_empty(&rq->buffers)) {
+                struct kgem_bo *bo;
+                bo = list_first_entry(&rq->buffers,
+                                      struct kgem_bo,
+                                      request);
+                assert(RQ(bo->rq) == rq);
+                assert(bo->exec == NULL);
+                assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
+                list_del(&bo->request);
+                if (bo->needs_flush)
+                        bo->needs_flush = __kgem_busy(kgem, bo->handle);
+                if (bo->needs_flush) {
+                        DBG(("%s: moving %d to flushing\n",
+                             __FUNCTION__, bo->handle));
+                        list_add(&bo->request, &kgem->flushing);
+                        bo->rq = (void *)kgem;
+                        continue;
+                }
+                bo->domain = DOMAIN_NONE;
+                bo->rq = NULL;
+                if (bo->refcnt)
+                        continue;
+                if (bo->snoop) {
+                        kgem_bo_move_to_snoop(kgem, bo);
+                } else if (bo->scanout) {
+                        kgem_bo_move_to_scanout(kgem, bo);
+                } else if ((bo = kgem_bo_replace_io(bo))->reusable &&
+                           kgem_bo_set_purgeable(kgem, bo)) {
+                        kgem_bo_move_to_inactive(kgem, bo);
+                        retired = true;
+                } else {
+                        DBG(("%s: closing %d\n",
+                             __FUNCTION__, bo->handle));
+                        kgem_bo_free(kgem, bo);
+                }
+        }
+        assert(rq->bo->rq == NULL);
+        assert(list_is_empty(&rq->bo->request));
+        if (--rq->bo->refcnt == 0) {
+                if (kgem_bo_set_purgeable(kgem, rq->bo)) {
+                        kgem_bo_move_to_inactive(kgem, rq->bo);
+                        retired = true;
+                } else {
+                        DBG(("%s: closing %d\n",
+                             __FUNCTION__, rq->bo->handle));
+                        kgem_bo_free(kgem, rq->bo);
+                }
+        }
+        __kgem_request_free(rq);
+        return retired;
+}
+static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+{
+        bool retired = false;
+        while (!list_is_empty(&kgem->requests[ring])) {
+                struct kgem_request *rq;
+                rq = list_first_entry(&kgem->requests[ring],
+                                      struct kgem_request,
+                                      list);
+                if (__kgem_busy(kgem, rq->bo->handle))
+                        break;
+                retired |= __kgem_retire_rq(kgem, rq);
+        }
+#if HAS_DEBUG_FULL
+        {
+                struct kgem_bo *bo;
+                int count = 0;
+                list_for_each_entry(bo, &kgem->requests[ring], request)
+                        count++;
+                bo = NULL;
+                if (!list_is_empty(&kgem->requests[ring]))
+                        bo = list_first_entry(&kgem->requests[ring],
+                                              struct kgem_request,
+                                              list)->bo;
+                printf("%s: ring=%d, %d outstanding requests, oldest=%d\n",
+                       __FUNCTION__, ring, count, bo ? bo->handle : 0);
+        }
+#endif
+        return retired;
+}
+static bool kgem_retire__requests(struct kgem *kgem)
+{
+        bool retired = false;
+        int n;
+        for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
+                retired |= kgem_retire__requests_ring(kgem, n);
+                kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
+        }
+        return retired;
+}
+bool kgem_retire(struct kgem *kgem)
+{
+        bool retired = false;
+        DBG(("%s\n", __FUNCTION__));
+        kgem->need_retire = false;
+        retired |= kgem_retire__flushing(kgem);
+        retired |= kgem_retire__requests(kgem);
+        retired |= kgem_retire__buffers(kgem);
+        DBG(("%s -- retired=%d, need_retire=%d\n",
+             __FUNCTION__, retired, kgem->need_retire));
+        kgem->retire(kgem);
+        return retired;
+}
+bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
+{
+        struct kgem_request *rq;
+        assert(!list_is_empty(&kgem->requests[ring]));
+        rq = list_last_entry(&kgem->requests[ring],
+                             struct kgem_request, list);
+        if (__kgem_busy(kgem, rq->bo->handle)) {
+                DBG(("%s: last requests handle=%d still busy\n",
+                     __FUNCTION__, rq->bo->handle));
+                return false;
+        }
+        DBG(("%s: ring=%d idle (handle=%d)\n",
+             __FUNCTION__, ring, rq->bo->handle));
+        kgem_retire__requests_ring(kgem, ring);
+        assert(list_is_empty(&kgem->requests[ring]));
+        return true;
+}
+static void kgem_commit(struct kgem *kgem)
+{
+        struct kgem_request *rq = kgem->next_request;
+        struct kgem_bo *bo, *next;
+        list_for_each_entry_safe(bo, next, &rq->buffers, request) {
+                assert(next->request.prev == &bo->request);
+                DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
+                     __FUNCTION__, bo->handle, bo->proxy != NULL,
+                     bo->dirty, bo->needs_flush, bo->snoop,
+                     (unsigned)bo->exec->offset));
+                assert(!bo->purged);
+                assert(bo->exec);
+                assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
+                assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
+                bo->presumed_offset = bo->exec->offset;
+                bo->exec = NULL;
+                bo->target_handle = -1;
+                if (!bo->refcnt && !bo->reusable) {
+                        assert(!bo->snoop);
+                        kgem_bo_free(kgem, bo);
+                        continue;
+                }
+                bo->binding.offset = 0;
+                bo->domain = DOMAIN_GPU;
+                bo->dirty = false;
+                if (bo->proxy) {
+                        /* proxies are not used for domain tracking */
+                        bo->exec = NULL;
+                        __kgem_bo_clear_busy(bo);
+                }
+                kgem->scanout_busy |= bo->scanout;
+        }
+        if (rq == &kgem->static_request) {
+                struct drm_i915_gem_set_domain set_domain;
+                DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
+                VG_CLEAR(set_domain);
+                set_domain.handle = rq->bo->handle;
+                set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+                set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+                if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
+                        DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+                        kgem_throttle(kgem);
+                }
+                kgem_retire(kgem);
+                assert(list_is_empty(&rq->buffers));
+                gem_close(kgem->fd, rq->bo->handle);
+                kgem_cleanup_cache(kgem);
+        } else {
+                list_add_tail(&rq->list, &kgem->requests[rq->ring]);
+                kgem->need_throttle = kgem->need_retire = 1;
+        }
+        kgem->next_request = NULL;
+}
+static void kgem_close_list(struct kgem *kgem, struct list *head)
+{
+        while (!list_is_empty(head))
+                kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
+}
+static void kgem_close_inactive(struct kgem *kgem)
+{
+        unsigned int i;
+        for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+                kgem_close_list(kgem, &kgem->inactive[i]);
+}
+static void kgem_finish_buffers(struct kgem *kgem)
+{
+        struct kgem_buffer *bo, *next;
+        list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
+                DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%d\n",
+                     __FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
+                     bo->write, bo->mmapped));
+                assert(next->base.list.prev == &bo->base.list);
+                assert(bo->base.io);
+                assert(bo->base.refcnt >= 1);
+                if (!bo->base.exec) {
+                        DBG(("%s: skipping unattached handle=%d, used=%d\n",
+                             __FUNCTION__, bo->base.handle, bo->used));
+                        continue;
+                }
+                if (!bo->write) {
+                        assert(bo->base.exec || bo->base.refcnt > 1);
+                        goto decouple;
+                }
+                if (bo->mmapped) {
+                        int used;
+                        assert(!bo->need_io);
+                        used = ALIGN(bo->used, PAGE_SIZE);
+                        if (!DBG_NO_UPLOAD_ACTIVE &&
+                            used + PAGE_SIZE <= bytes(&bo->base) &&
+                            (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
+                                DBG(("%s: retaining upload buffer (%d/%d)\n",
+                                     __FUNCTION__, bo->used, bytes(&bo->base)));
+                                bo->used = used;
+                                list_move(&bo->base.list,
+                                          &kgem->active_buffers);
+                                continue;
+                        }
+                        DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
+                             __FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
+                        goto decouple;
+                }
+                if (!bo->used) {
+                        /* Unless we replace the handle in the execbuffer,
+                         * then this bo will become active. So decouple it
+                         * from the buffer list and track it in the normal
+                         * manner.
+                         */
+                        goto decouple;
+                }
+                assert(bo->need_io);
+                assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
+                assert(bo->base.domain != DOMAIN_GPU);
+                if (bo->base.refcnt == 1 &&
+                    bo->base.size.pages.count > 1 &&
+                    bo->used < bytes(&bo->base) / 2) {
+                        struct kgem_bo *shrink;
+                        unsigned alloc = NUM_PAGES(bo->used);
+                        shrink = search_snoop_cache(kgem, alloc,
+                                                    CREATE_INACTIVE | CREATE_NO_RETIRE);
+                        if (shrink) {
+                                void *map;
+                                int n;
+                                DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
+                                     __FUNCTION__,
+                                     bo->used, bytes(&bo->base), bytes(shrink),
+                                     bo->base.handle, shrink->handle));
+                                assert(bo->used <= bytes(shrink));
+                                map = kgem_bo_map__cpu(kgem, shrink);
+                                if (map) {
+                                        kgem_bo_sync__cpu(kgem, shrink);
+                                        memcpy(map, bo->mem, bo->used);
+                                        shrink->target_handle =
+                                                kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
+                                        for (n = 0; n < kgem->nreloc; n++) {
+                                                if (kgem->reloc[n].target_handle == bo->base.target_handle) {
+                                                        kgem->reloc[n].target_handle = shrink->target_handle;
+                                                        kgem->reloc[n].presumed_offset = shrink->presumed_offset;
+                                                        kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+                                                                kgem->reloc[n].delta + shrink->presumed_offset;
+                                                }
+                                        }
+                                        bo->base.exec->handle = shrink->handle;
+                                        bo->base.exec->offset = shrink->presumed_offset;
+                                        shrink->exec = bo->base.exec;
+                                        shrink->rq = bo->base.rq;
+                                        list_replace(&bo->base.request,
+                                                     &shrink->request);
+                                        list_init(&bo->base.request);
+                                        shrink->needs_flush = bo->base.dirty;
+                                        bo->base.exec = NULL;
+                                        bo->base.rq = NULL;
+                                        bo->base.dirty = false;
+                                        bo->base.needs_flush = false;
+                                        bo->used = 0;
+                                        goto decouple;
+                                }
+                                __kgem_bo_destroy(kgem, shrink);
+                        }
+                        shrink = search_linear_cache(kgem, alloc,
+                                                     CREATE_INACTIVE | CREATE_NO_RETIRE);
+                        if (shrink) {
+                                int n;
+                                DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
+                                     __FUNCTION__,
+                                     bo->used, bytes(&bo->base), bytes(shrink),
+                                     bo->base.handle, shrink->handle));
+                                assert(bo->used <= bytes(shrink));
+                                if (gem_write(kgem->fd, shrink->handle,
+, bo->used, bo->mem) == 0) {
+                                        shrink->target_handle =
+                                                kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
+                                        for (n = 0; n < kgem->nreloc; n++) {
+                                                if (kgem->reloc[n].target_handle == bo->base.target_handle) {
+                                                        kgem->reloc[n].target_handle = shrink->target_handle;
+                                                        kgem->reloc[n].presumed_offset = shrink->presumed_offset;
+                                                        kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+                                                                kgem->reloc[n].delta + shrink->presumed_offset;
+                                                }
+                                        }
+                                        bo->base.exec->handle = shrink->handle;
+                                        bo->base.exec->offset = shrink->presumed_offset;
+                                        shrink->exec = bo->base.exec;
+                                        shrink->rq = bo->base.rq;
+                                        list_replace(&bo->base.request,
+                                                     &shrink->request);
+                                        list_init(&bo->base.request);
+                                        shrink->needs_flush = bo->base.dirty;
+                                        bo->base.exec = NULL;
+                                        bo->base.rq = NULL;
+                                        bo->base.dirty = false;
+                                        bo->base.needs_flush = false;
+                                        bo->used = 0;
+                                        goto decouple;
+                                }
+                                __kgem_bo_destroy(kgem, shrink);
+                        }
+                }
+                DBG(("%s: handle=%d, uploading %d/%d\n",
+                     __FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
+                ASSERT_IDLE(kgem, bo->base.handle);
+                assert(bo->used <= bytes(&bo->base));
+                gem_write(kgem->fd, bo->base.handle,
+, bo->used, bo->mem);
+                bo->need_io = 0;
+decouple:
+                DBG(("%s: releasing handle=%d\n",
+                     __FUNCTION__, bo->base.handle));
+                list_del(&bo->base.list);
+                kgem_bo_unref(kgem, &bo->base);
+        }
+}
+static void kgem_cleanup(struct kgem *kgem)
+{
+        int n;
+        for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
+                while (!list_is_empty(&kgem->requests[n])) {
+                        struct kgem_request *rq;
+                        rq = list_first_entry(&kgem->requests[n],
+                                              struct kgem_request,
+                                              list);
+                        while (!list_is_empty(&rq->buffers)) {
+                                struct kgem_bo *bo;
+                                bo = list_first_entry(&rq->buffers,
+                                                      struct kgem_bo,
+                                                      request);
+                                bo->exec = NULL;
+                                bo->dirty = false;
+                                __kgem_bo_clear_busy(bo);
+                                if (bo->refcnt == 0)
+                                        kgem_bo_free(kgem, bo);
+                        }
+                        __kgem_request_free(rq);
+                }
+        }
+        kgem_close_inactive(kgem);
+}
+static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
+{
+        int ret;
+        ASSERT_IDLE(kgem, handle);
+        /* If there is no surface data, just upload the batch */
+        if (kgem->surface == kgem->batch_size)
+                return gem_write(kgem->fd, handle,
+, sizeof(uint32_t)*kgem->nbatch,
+                                 kgem->batch);
+        /* Are the batch pages conjoint with the surface pages? */
+        if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
+                assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
+                return gem_write(kgem->fd, handle,
+, kgem->batch_size*sizeof(uint32_t),
+                                 kgem->batch);
+        }
+        /* Disjoint surface/batch, upload separately */
+        ret = gem_write(kgem->fd, handle,
+, sizeof(uint32_t)*kgem->nbatch,
+                        kgem->batch);
+        if (ret)
+                return ret;
+        ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
+        ret -= sizeof(uint32_t) * kgem->surface;
+        assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
+        return __gem_write(kgem->fd, handle,
+                        size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
+                        kgem->batch + kgem->surface);
+}
+void kgem_reset(struct kgem *kgem)
+{
+        if (kgem->next_request) {
+                struct kgem_request *rq = kgem->next_request;
+                while (!list_is_empty(&rq->buffers)) {
+                        struct kgem_bo *bo =
+                                list_first_entry(&rq->buffers,
+                                                 struct kgem_bo,
+                                                 request);
+                        list_del(&bo->request);
+                        assert(RQ(bo->rq) == rq);
+                        bo->binding.offset = 0;
+                        bo->exec = NULL;
+                        bo->target_handle = -1;
+                        bo->dirty = false;
+                        if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
+                                list_add(&bo->request, &kgem->flushing);
+                                bo->rq = (void *)kgem;
+                        } else
+                                __kgem_bo_clear_busy(bo);
+                        if (!bo->refcnt && !bo->reusable) {
+                                assert(!bo->snoop);
+                                DBG(("%s: discarding handle=%d\n",
+                                     __FUNCTION__, bo->handle));
+                                kgem_bo_free(kgem, bo);
+                        }
+                }
+                if (rq != &kgem->static_request) {
+                        list_init(&rq->list);
+                        __kgem_request_free(rq);
+                }
+        }
+        kgem->nfence = 0;
+        kgem->nexec = 0;
+        kgem->nreloc = 0;
+        kgem->nreloc__self = 0;
+        kgem->aperture = 0;
+        kgem->aperture_fenced = 0;
+        kgem->nbatch = 0;
+        kgem->surface = kgem->batch_size;
+        kgem->mode = KGEM_NONE;
+        kgem->flush = 0;
+        kgem->batch_flags = kgem->batch_flags_base;
+        kgem->next_request = __kgem_request_alloc(kgem);
+        kgem_sna_reset(kgem);
+}
+static int compact_batch_surface(struct kgem *kgem)
+{
+        int size, shrink, n;
+        if (!kgem->has_relaxed_delta)
+                return kgem->batch_size;
+        /* See if we can pack the contents into one or two pages */
+        n = ALIGN(kgem->batch_size, 1024);
+        size = n - kgem->surface + kgem->nbatch;
+        size = ALIGN(size, 1024);
+        shrink = n - size;
+        if (shrink) {
+                DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
+                shrink *= sizeof(uint32_t);
+                for (n = 0; n < kgem->nreloc; n++) {
+                        if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
+                            kgem->reloc[n].target_handle == ~0U)
+                                kgem->reloc[n].delta -= shrink;
+                        if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
+                                kgem->reloc[n].offset -= shrink;
+                }
+        }
+        return size * sizeof(uint32_t);
+}
+static struct kgem_bo *
+kgem_create_batch(struct kgem *kgem, int size)
+{
+        struct drm_i915_gem_set_domain set_domain;
+        struct kgem_bo *bo;
+        if (size <= 4096) {
+                bo = list_first_entry(&kgem->pinned_batches[0],
+                                      struct kgem_bo,
+                                      list);
+                if (!bo->rq) {
+out_4096:
+                        list_move_tail(&bo->list, &kgem->pinned_batches[0]);
+                        return kgem_bo_reference(bo);
+                }
+                if (!__kgem_busy(kgem, bo->handle)) {
+                        assert(RQ(bo->rq)->bo == bo);
+                        __kgem_retire_rq(kgem, RQ(bo->rq));
+                        goto out_4096;
+                }
+        }
+        if (size <= 16384) {
+                bo = list_first_entry(&kgem->pinned_batches[1],
+                                      struct kgem_bo,
+                                      list);
+                if (!bo->rq) {
+out_16384:
+                        list_move_tail(&bo->list, &kgem->pinned_batches[1]);
+                        return kgem_bo_reference(bo);
+                }
+                if (!__kgem_busy(kgem, bo->handle)) {
+                        assert(RQ(bo->rq)->bo == bo);
+                        __kgem_retire_rq(kgem, RQ(bo->rq));
+                        goto out_16384;
+                }
+        }
+        if (kgem->gen == 020 && !kgem->has_pinned_batches) {
+                assert(size <= 16384);
+                bo = list_first_entry(&kgem->pinned_batches[size > 4096],
+                                      struct kgem_bo,
+                                      list);
+                list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
+                DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
+                VG_CLEAR(set_domain);
+                set_domain.handle = bo->handle;
+                set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+                set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+                if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
+                        DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
+                        kgem_throttle(kgem);
+                        return NULL;
+                }
+                kgem_retire(kgem);
+                assert(bo->rq == NULL);
+                return kgem_bo_reference(bo);
+        }
+        return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
+}
+void _kgem_submit(struct kgem *kgem)
+{
+        struct kgem_request *rq;
+        uint32_t batch_end;
+        int size;
+        assert(!DBG_NO_HW);
+        assert(!kgem->wedged);
+        assert(kgem->nbatch);
+        assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
+        assert(kgem->nbatch <= kgem->surface);
+        batch_end = kgem_end_batch(kgem);
+        kgem_sna_flush(kgem);
+        DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
+             kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
+             kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
+        assert(kgem->nbatch <= kgem->batch_size);
+        assert(kgem->nbatch <= kgem->surface);
+        assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
+        assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
+        assert(kgem->nfence <= kgem->fence_max);
+        kgem_finish_buffers(kgem);
+#if SHOW_BATCH
+        __kgem_batch_debug(kgem, batch_end);
+#endif
+        rq = kgem->next_request;
+        if (kgem->surface != kgem->batch_size)
+                size = compact_batch_surface(kgem);
+        else
+                size = kgem->nbatch * sizeof(kgem->batch[0]);
+        rq->bo = kgem_create_batch(kgem, size);
+        if (rq->bo) {
+                uint32_t handle = rq->bo->handle;
+                int i;
+                assert(!rq->bo->needs_flush);
+                i = kgem->nexec++;
+                kgem->exec[i].handle = handle;
+                kgem->exec[i].relocation_count = kgem->nreloc;
+                kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
+                kgem->exec[i].alignment = 0;
+                kgem->exec[i].offset = rq->bo->presumed_offset;
+                kgem->exec[i].flags = 0;
+                kgem->exec[i].rsvd1 = 0;
+                kgem->exec[i].rsvd2 = 0;
+                rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
+                rq->bo->exec = &kgem->exec[i];
+                rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
+                list_add(&rq->bo->request, &rq->buffers);
+                rq->ring = kgem->ring == KGEM_BLT;
+                kgem_fixup_self_relocs(kgem, rq->bo);
+                if (kgem_batch_write(kgem, handle, size) == 0) {
+                        struct drm_i915_gem_execbuffer2 execbuf;
+                        int ret, retry = 3;
+                        VG_CLEAR(execbuf);
+                        execbuf.buffers_ptr = (uintptr_t)kgem->exec;
+                        execbuf.buffer_count = kgem->nexec;
+                        execbuf.batch_start_offset = 0;
+                        execbuf.batch_len = batch_end*sizeof(uint32_t);
+                        execbuf.cliprects_ptr = 0;
+                        execbuf.num_cliprects = 0;
+                        execbuf.DR1 = 0;
+                        execbuf.DR4 = 0;
+                        execbuf.flags = kgem->ring | kgem->batch_flags;
+                        execbuf.rsvd1 = 0;
+                        execbuf.rsvd2 = 0;
+                        ret = drmIoctl(kgem->fd,
+                                       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+                                       &execbuf);
+                        while (ret == -1 && errno == EBUSY && retry--) {
+                                __kgem_throttle(kgem);
+                                ret = drmIoctl(kgem->fd,
+                                               DRM_IOCTL_I915_GEM_EXECBUFFER2,
+                                               &execbuf);
+                        }
+                        if (DEBUG_SYNC && ret == 0) {
+                                struct drm_i915_gem_set_domain set_domain;
+                                VG_CLEAR(set_domain);
+                                set_domain.handle = handle;
+                                set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+                                set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+                                ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+                        }
+                        if (ret == -1) {
+//                              DBG(("%s: GPU hang detected [%d]\n",
+//                                   __FUNCTION__, errno));
+                                kgem_throttle(kgem);
+                                kgem->wedged = true;
+#if 0
+                                ret = errno;
+                                ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
+                                       kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
+                                       kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
+                                for (i = 0; i < kgem->nexec; i++) {
+                                        struct kgem_bo *bo, *found = NULL;
+                                        list_for_each_entry(bo, &kgem->next_request->buffers, request) {
+                                                if (bo->handle == kgem->exec[i].handle) {
+                                                        found = bo;
+                                                        break;
+                                                }
+                                        }
+                                        ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
+                                               i,
+                                               kgem->exec[i].handle,
+                                               (int)kgem->exec[i].offset,
+                                               found ? kgem_bo_size(found) : -1,
+                                               found ? found->tiling : -1,
+                                               (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
+                                               found ? found->snoop : -1,
+                                               found ? found->purged : -1);
+                                }
+                                for (i = 0; i < kgem->nreloc; i++) {
+                                        ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
+                                               i,
+                                               (int)kgem->reloc[i].offset,
+                                               kgem->reloc[i].target_handle,
+                                               kgem->reloc[i].delta,
+                                               kgem->reloc[i].read_domains,
+                                               kgem->reloc[i].write_domain,
+                                               (int)kgem->reloc[i].presumed_offset);
+                                }
+                                if (DEBUG_SYNC) {
+                                        int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
+                                        if (fd != -1) {
+                                                write(fd, kgem->batch, batch_end*sizeof(uint32_t));
+                                                close(fd);
+                                        }
+                                        FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
+                                }
+#endif
+                        }
+                }
+                kgem_commit(kgem);
+        }
+        if (kgem->wedged)
+                kgem_cleanup(kgem);
+        kgem_reset(kgem);
+        assert(kgem->next_request != NULL);
+}
+void kgem_throttle(struct kgem *kgem)
+{
+        kgem->need_throttle = 0;
+        if (kgem->wedged)
+                return;
+        kgem->wedged = __kgem_throttle(kgem);
+        if (kgem->wedged) {
+                printf("Detected a hung GPU, disabling acceleration.\n");
+                printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
+        }
+}
+void kgem_purge_cache(struct kgem *kgem)
+{
+        struct kgem_bo *bo, *next;
+        int i;
+        for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+                list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
+                        if (!kgem_bo_is_retained(kgem, bo)) {
+                                DBG(("%s: purging %d\n",
+                                     __FUNCTION__, bo->handle));
+                                kgem_bo_free(kgem, bo);
+                        }
+                }
+        }
+        kgem->need_purge = false;
+}
+bool kgem_expire_cache(struct kgem *kgem)
+{
+        time_t now, expire;
+        struct kgem_bo *bo;
+        unsigned int size = 0, count = 0;
+        bool idle;
+        unsigned int i;
+        time(&now);
+        while (__kgem_freed_bo) {
+                bo = __kgem_freed_bo;
+                __kgem_freed_bo = *(struct kgem_bo **)bo;
+                free(bo);
+        }
+        while (__kgem_freed_request) {
+                struct kgem_request *rq = __kgem_freed_request;
+                __kgem_freed_request = *(struct kgem_request **)rq;
+                free(rq);
+        }
+        while (!list_is_empty(&kgem->large_inactive)) {
+                kgem_bo_free(kgem,
+                             list_first_entry(&kgem->large_inactive,
+                                              struct kgem_bo, list));
+        }
+        while (!list_is_empty(&kgem->scanout)) {
+                bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
+                if (__kgem_busy(kgem, bo->handle))
+                        break;
+                list_del(&bo->list);
+                kgem_bo_clear_scanout(kgem, bo);
+                __kgem_bo_destroy(kgem, bo);
+        }
+        expire = 0;
+        list_for_each_entry(bo, &kgem->snoop, list) {
+                if (bo->delta) {
+                        expire = now - MAX_INACTIVE_TIME/2;
+                        break;
+                }
+                bo->delta = now;
+        }
+        if (expire) {
+                while (!list_is_empty(&kgem->snoop)) {
+                        bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
+                        if (bo->delta > expire)
+                                break;
+                        kgem_bo_free(kgem, bo);
+                }
+        }
+#ifdef DEBUG_MEMORY
+        {
+                long snoop_size = 0;
+                int snoop_count = 0;
+                list_for_each_entry(bo, &kgem->snoop, list)
+                        snoop_count++, snoop_size += bytes(bo);
+                ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
+                       __FUNCTION__, snoop_count, snoop_size);
+        }
+#endif
+        kgem_retire(kgem);
+        if (kgem->wedged)
+                kgem_cleanup(kgem);
+        kgem->expire(kgem);
+        if (kgem->need_purge)
+                kgem_purge_cache(kgem);
+        expire = 0;
+        idle = !kgem->need_retire;
+        for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+                idle &= list_is_empty(&kgem->inactive[i]);
+                list_for_each_entry(bo, &kgem->inactive[i], list) {
+                        if (bo->delta) {
+                                expire = now - MAX_INACTIVE_TIME;
+                                break;
+                        }
+                        bo->delta = now;
+                }
+        }
+        if (idle) {
+                DBG(("%s: idle\n", __FUNCTION__));
+                kgem->need_expire = false;
+                return false;
+        }
+        if (expire == 0)
+                return true;
+        idle = !kgem->need_retire;
+        for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+                struct list preserve;
+                list_init(&preserve);
+                while (!list_is_empty(&kgem->inactive[i])) {
+                        bo = list_last_entry(&kgem->inactive[i],
+                                             struct kgem_bo, list);
+                        if (bo->delta > expire) {
+                                idle = false;
+                                break;
+                        }
+                        if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
+                                idle = false;
+                                list_move_tail(&bo->list, &preserve);
+                        } else {
+                                count++;
+                                size += bytes(bo);
+                                kgem_bo_free(kgem, bo);
+                                DBG(("%s: expiring %d\n",
+                                     __FUNCTION__, bo->handle));
+                        }
+                }
+                if (!list_is_empty(&preserve)) {
+                        preserve.prev->next = kgem->inactive[i].next;
+                        kgem->inactive[i].next->prev = preserve.prev;
+                        kgem->inactive[i].next = preserve.next;
+                        preserve.next->prev = &kgem->inactive[i];
+                }
+        }
+#ifdef DEBUG_MEMORY
+        {
+                long inactive_size = 0;
+                int inactive_count = 0;
+                for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
+                        list_for_each_entry(bo, &kgem->inactive[i], list)
+                                inactive_count++, inactive_size += bytes(bo);
+                ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
+                       __FUNCTION__, inactive_count, inactive_size);
+        }
+#endif
+        DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
+             __FUNCTION__, count, size, idle));
+        kgem->need_expire = !idle;
+        return !idle;
+        (void)count;
+        (void)size;
+}
+void kgem_cleanup_cache(struct kgem *kgem)
+{
+        unsigned int i;
+        int n;
+        /* sync to the most recent request */
+        for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
+                if (!list_is_empty(&kgem->requests[n])) {
+                        struct kgem_request *rq;
+                        struct drm_i915_gem_set_domain set_domain;
+                        rq = list_first_entry(&kgem->requests[n],
+                                              struct kgem_request,
+                                              list);
+                        DBG(("%s: sync on cleanup\n", __FUNCTION__));
+                        VG_CLEAR(set_domain);
+                        set_domain.handle = rq->bo->handle;
+                        set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+                        set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+                        (void)drmIoctl(kgem->fd,
+                                       DRM_IOCTL_I915_GEM_SET_DOMAIN,
+                                       &set_domain);
+                }
+        }
+        kgem_retire(kgem);
+        kgem_cleanup(kgem);
+        for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
+                while (!list_is_empty(&kgem->inactive[i]))
+                        kgem_bo_free(kgem,
+                                     list_last_entry(&kgem->inactive[i],
+                                                     struct kgem_bo, list));
+        }
+        while (!list_is_empty(&kgem->snoop))
+                kgem_bo_free(kgem,
+                             list_last_entry(&kgem->snoop,
+                                             struct kgem_bo, list));
+        while (__kgem_freed_bo) {
+                struct kgem_bo *bo = __kgem_freed_bo;
+                __kgem_freed_bo = *(struct kgem_bo **)bo;
+                free(bo);
+        }
+        kgem->need_purge = false;
+        kgem->need_expire = false;
+}
+static struct kgem_bo *
+search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
+{
+        struct kgem_bo *bo, *first = NULL;
+        bool use_active = (flags & CREATE_INACTIVE) == 0;
+        struct list *cache;
+        DBG(("%s: num_pages=%d, flags=%x, use_active? %d\n",
+             __FUNCTION__, num_pages, flags, use_active));
+        if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE)
+                return NULL;
+        if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
+                DBG(("%s: inactive and cache bucket empty\n",
+                     __FUNCTION__));
+                if (flags & CREATE_NO_RETIRE) {
+                        DBG(("%s: can not retire\n", __FUNCTION__));
+                        return NULL;
+                }
+                if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
+                        DBG(("%s: active cache bucket empty\n", __FUNCTION__));
+                        return NULL;
+                }
+                if (!__kgem_throttle_retire(kgem, flags)) {
+                        DBG(("%s: nothing retired\n", __FUNCTION__));
+                        return NULL;
+                }
+                if (list_is_empty(inactive(kgem, num_pages))) {
+                        DBG(("%s: active cache bucket still empty after retire\n",
+                             __FUNCTION__));
+                        return NULL;
+                }
+        }
+        if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
+                int for_cpu = !!(flags & CREATE_CPU_MAP);
+                DBG(("%s: searching for inactive %s map\n",
+                     __FUNCTION__, for_cpu ? "cpu" : "gtt"));
+                cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
+                list_for_each_entry(bo, cache, vma) {
+                        assert(IS_CPU_MAP(bo->map) == for_cpu);
+                        assert(bucket(bo) == cache_bucket(num_pages));
+                        assert(bo->proxy == NULL);
+                        assert(bo->rq == NULL);
+                        assert(bo->exec == NULL);
+                        assert(!bo->scanout);
+                        if (num_pages > num_pages(bo)) {
+                                DBG(("inactive too small: %d < %d\n",
+                                     num_pages(bo), num_pages));
+                                continue;
+                        }
+                        if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+                                kgem_bo_free(kgem, bo);
+                                break;
+                        }
+                        if (I915_TILING_NONE != bo->tiling &&
+                            !gem_set_tiling(kgem->fd, bo->handle,
+                                            I915_TILING_NONE, 0))
+                                continue;
+                        kgem_bo_remove_from_inactive(kgem, bo);
+                        bo->tiling = I915_TILING_NONE;
+                        bo->pitch = 0;
+                        bo->delta = 0;
+                        DBG(("  %s: found handle=%d (num_pages=%d) in linear vma cache\n",
+                             __FUNCTION__, bo->handle, num_pages(bo)));
+                        assert(use_active || bo->domain != DOMAIN_GPU);
+                        assert(!bo->needs_flush);
+                        ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
+                        return bo;
+                }
+                if (flags & CREATE_EXACT)
+                        return NULL;
+                if (flags & CREATE_CPU_MAP && !kgem->has_llc)
+                        return NULL;
+        }
+        cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
+        list_for_each_entry(bo, cache, list) {
+                assert(bo->refcnt == 0);
+                assert(bo->reusable);
+                assert(!!bo->rq == !!use_active);
+                assert(bo->proxy == NULL);
+                assert(!bo->scanout);
+                if (num_pages > num_pages(bo))
+                        continue;
+                if (use_active &&
+                    kgem->gen <= 040 &&
+                    bo->tiling != I915_TILING_NONE)
+                        continue;
+                if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+                        kgem_bo_free(kgem, bo);
+                        break;
+                }
+                if (I915_TILING_NONE != bo->tiling) {
+                        if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
+                                continue;
+                        if (first)
+                                continue;
+                        if (!gem_set_tiling(kgem->fd, bo->handle,
+                                            I915_TILING_NONE, 0))
+                                continue;
+                        bo->tiling = I915_TILING_NONE;
+                        bo->pitch = 0;
+                }
+                if (bo->map) {
+                        if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
+                                int for_cpu = !!(flags & CREATE_CPU_MAP);
+                                if (IS_CPU_MAP(bo->map) != for_cpu) {
+                                        if (first != NULL)
+                                                break;
+                                        first = bo;
+                                        continue;
+                                }
+                        } else {
+                                if (first != NULL)
+                                        break;
+                                first = bo;
+                                continue;
+                        }
+                } else {
+                        if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
+                                if (first != NULL)
+                                        break;
+                                first = bo;
+                                continue;
+                        }
+                }
+                if (use_active)
+                        kgem_bo_remove_from_active(kgem, bo);
+                else
+                        kgem_bo_remove_from_inactive(kgem, bo);
+                assert(bo->tiling == I915_TILING_NONE);
+                bo->pitch = 0;
+                bo->delta = 0;
+                DBG(("  %s: found handle=%d (num_pages=%d) in linear %s cache\n",
+                     __FUNCTION__, bo->handle, num_pages(bo),
+                     use_active ? "active" : "inactive"));
+                assert(list_is_empty(&bo->list));
+                assert(use_active || bo->domain != DOMAIN_GPU);
+                assert(!bo->needs_flush || use_active);
+                ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
+                return bo;
+        }
+        if (first) {
+                assert(first->tiling == I915_TILING_NONE);
+                if (use_active)
+                        kgem_bo_remove_from_active(kgem, first);
+                else
+                        kgem_bo_remove_from_inactive(kgem, first);
+                first->pitch = 0;
+                first->delta = 0;
+                DBG(("  %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
+                     __FUNCTION__, first->handle, num_pages(first),
+                     use_active ? "active" : "inactive"));
+                assert(list_is_empty(&first->list));
+                assert(use_active || first->domain != DOMAIN_GPU);
+                assert(!first->needs_flush || use_active);
+                ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
+                return first;
+        }
+        return NULL;
+}
+struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
+{
+        struct kgem_bo *bo;
+        uint32_t handle;
+        DBG(("%s(%d)\n", __FUNCTION__, size));
+        if (flags & CREATE_GTT_MAP && kgem->has_llc) {
+                flags &= ~CREATE_GTT_MAP;
+                flags |= CREATE_CPU_MAP;
+        }
+        size = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+        bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
+        if (bo) {
+                assert(bo->domain != DOMAIN_GPU);
+                ASSERT_IDLE(kgem, bo->handle);
+                bo->refcnt = 1;
+                return bo;
+        }
+        if (flags & CREATE_CACHED)
+                return NULL;
+        handle = gem_create(kgem->fd, size);
+        if (handle == 0)
+                return NULL;
+        DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
+        bo = __kgem_bo_alloc(handle, size);
+        if (bo == NULL) {
+                gem_close(kgem->fd, handle);
+                return NULL;
+        }
+        debug_alloc__bo(kgem, bo);
+        return bo;
+}
+inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
+{
+        unsigned int size;
+        assert(bo->tiling);
+        assert(kgem->gen < 040);
+        if (kgem->gen < 030)
+                size = 512 * 1024;
+        else
+                size = 1024 * 1024;
+        while (size < bytes(bo))
+                size *= 2;
+        return size;
+}
+struct kgem_bo *kgem_create_2d(struct kgem *kgem,
+                               int width,
+                               int height,
+                               int bpp,
+                               int tiling,
+                               uint32_t flags)
+{
+        struct list *cache;
+        struct kgem_bo *bo;
+        uint32_t pitch, untiled_pitch, tiled_height, size;
+        uint32_t handle;
+        int i, bucket, retry;
+        if (tiling < 0)
+                tiling = -tiling, flags |= CREATE_EXACT;
+        DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
+             width, height, bpp, tiling,
+             !!(flags & CREATE_EXACT),
+             !!(flags & CREATE_INACTIVE),
+             !!(flags & CREATE_CPU_MAP),
+             !!(flags & CREATE_GTT_MAP),
+             !!(flags & CREATE_SCANOUT),
+             !!(flags & CREATE_PRIME),
+             !!(flags & CREATE_TEMPORARY)));
+        size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
+                                 width, height, bpp, tiling, &pitch);
+        assert(size && size <= kgem->max_object_size);
+        size /= PAGE_SIZE;
+        bucket = cache_bucket(size);
+        if (flags & CREATE_SCANOUT) {
+                assert((flags & CREATE_INACTIVE) == 0);
+                list_for_each_entry_reverse(bo, &kgem->scanout, list) {
+                        assert(bo->scanout);
+                        assert(bo->delta);
+                        assert(!bo->purged);
+                        if (size > num_pages(bo) || num_pages(bo) > 2*size)
+                                continue;
+                        if (bo->tiling != tiling ||
+                            (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+                                if (!gem_set_tiling(kgem->fd, bo->handle,
+                                                    tiling, pitch))
+                                        continue;
+                                bo->tiling = tiling;
+                                bo->pitch = pitch;
+                        }
+                        list_del(&bo->list);
+                        bo->unique_id = kgem_get_unique_id(kgem);
+                        DBG(("  1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                        bo->refcnt = 1;
+                        return bo;
+                }
+        }
+        if (bucket >= NUM_CACHE_BUCKETS) {
+                DBG(("%s: large bo num pages=%d, bucket=%d\n",
+                     __FUNCTION__, size, bucket));
+                if (flags & CREATE_INACTIVE)
+                        goto large_inactive;
+                tiled_height = kgem_aligned_height(kgem, height, tiling);
+                untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
+                list_for_each_entry(bo, &kgem->large, list) {
+                        assert(!bo->purged);
+                        assert(!bo->scanout);
+                        assert(bo->refcnt == 0);
+                        assert(bo->reusable);
+                        assert(bo->flush == true);
+                        if (kgem->gen < 040) {
+                                if (bo->pitch < pitch) {
+                                        DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
+                                             bo->tiling, tiling,
+                                             bo->pitch, pitch));
+                                        continue;
+                                }
+                                if (bo->pitch * tiled_height > bytes(bo))
+                                        continue;
+                        } else {
+                                if (num_pages(bo) < size)
+                                        continue;
+                                if (bo->pitch != pitch || bo->tiling != tiling) {
+                                        if (!gem_set_tiling(kgem->fd, bo->handle,
+                                                            tiling, pitch))
+                                                continue;
+                                        bo->pitch = pitch;
+                                        bo->tiling = tiling;
+                                }
+                        }
+                        kgem_bo_remove_from_active(kgem, bo);
+                        bo->unique_id = kgem_get_unique_id(kgem);
+                        bo->delta = 0;
+                        DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                        bo->refcnt = 1;
+                        return bo;
+                }
+large_inactive:
+                list_for_each_entry(bo, &kgem->large_inactive, list) {
+                        assert(bo->refcnt == 0);
+                        assert(bo->reusable);
+                        assert(!bo->scanout);
+                        if (size > num_pages(bo))
+                                continue;
+                        if (bo->tiling != tiling ||
+                            (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+                                if (!gem_set_tiling(kgem->fd, bo->handle,
+                                                    tiling, pitch))
+                                        continue;
+                                bo->tiling = tiling;
+                                bo->pitch = pitch;
+                        }
+                        if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+                                kgem_bo_free(kgem, bo);
+                                break;
+                        }
+                        list_del(&bo->list);
+                        bo->unique_id = kgem_get_unique_id(kgem);
+                        bo->pitch = pitch;
+                        bo->delta = 0;
+                        DBG(("  1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                        bo->refcnt = 1;
+                        return bo;
+                }
+                goto create;
+        }
+        if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
+                int for_cpu = !!(flags & CREATE_CPU_MAP);
+                if (kgem->has_llc && tiling == I915_TILING_NONE)
+                        for_cpu = 1;
+                /* We presume that we will need to upload to this bo,
+                 * and so would prefer to have an active VMA.
+                 */
+                cache = &kgem->vma[for_cpu].inactive[bucket];
+                do {
+                        list_for_each_entry(bo, cache, vma) {
+                                assert(bucket(bo) == bucket);
+                                assert(bo->refcnt == 0);
+                                assert(!bo->scanout);
+                                assert(bo->map);
+                                assert(IS_CPU_MAP(bo->map) == for_cpu);
+                                assert(bo->rq == NULL);
+                                assert(list_is_empty(&bo->request));
+                                assert(bo->flush == false);
+                                if (size > num_pages(bo)) {
+                                        DBG(("inactive too small: %d < %d\n",
+                                             num_pages(bo), size));
+                                        continue;
+                                }
+                                if (bo->tiling != tiling ||
+                                    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+                                        DBG(("inactive vma with wrong tiling: %d < %d\n",
+                                             bo->tiling, tiling));
+                                        continue;
+                                }
+                                if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+                                        kgem_bo_free(kgem, bo);
+                                        break;
+                                }
+                                bo->pitch = pitch;
+                                bo->delta = 0;
+                                bo->unique_id = kgem_get_unique_id(kgem);
+                                kgem_bo_remove_from_inactive(kgem, bo);
+                                DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
+                                     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                                assert(bo->reusable);
+                                assert(bo->domain != DOMAIN_GPU);
+                                ASSERT_IDLE(kgem, bo->handle);
+                                assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                                bo->refcnt = 1;
+                                return bo;
+                        }
+                } while (!list_is_empty(cache) &&
+                         __kgem_throttle_retire(kgem, flags));
+                if (flags & CREATE_CPU_MAP && !kgem->has_llc)
+                        goto create;
+        }
+        if (flags & CREATE_INACTIVE)
+                goto skip_active_search;
+        /* Best active match */
+        retry = NUM_CACHE_BUCKETS - bucket;
+        if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
+                retry = 3;
+search_again:
+        assert(bucket < NUM_CACHE_BUCKETS);
+        cache = &kgem->active[bucket][tiling];
+        if (tiling) {
+                tiled_height = kgem_aligned_height(kgem, height, tiling);
+                list_for_each_entry(bo, cache, list) {
+                        assert(!bo->purged);
+                        assert(bo->refcnt == 0);
+                        assert(bucket(bo) == bucket);
+                        assert(bo->reusable);
+                        assert(bo->tiling == tiling);
+                        assert(bo->flush == false);
+                        assert(!bo->scanout);
+                        if (kgem->gen < 040) {
+                                if (bo->pitch < pitch) {
+                                        DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
+                                             bo->tiling, tiling,
+                                             bo->pitch, pitch));
+                                        continue;
+                                }
+                                if (bo->pitch * tiled_height > bytes(bo))
+                                        continue;
+                        } else {
+                                if (num_pages(bo) < size)
+                                        continue;
+                                if (bo->pitch != pitch) {
+                                        if (!gem_set_tiling(kgem->fd,
+                                                            bo->handle,
+                                                            tiling, pitch))
+                                                continue;
+                                        bo->pitch = pitch;
+                                }
+                        }
+                        kgem_bo_remove_from_active(kgem, bo);
+                        bo->unique_id = kgem_get_unique_id(kgem);
+                        bo->delta = 0;
+                        DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                        bo->refcnt = 1;
+                        return bo;
+                }
+        } else {
+                list_for_each_entry(bo, cache, list) {
+                        assert(bucket(bo) == bucket);
+                        assert(!bo->purged);
+                        assert(bo->refcnt == 0);
+                        assert(bo->reusable);
+                        assert(!bo->scanout);
+                        assert(bo->tiling == tiling);
+                        assert(bo->flush == false);
+                        if (num_pages(bo) < size)
+                                continue;
+                        kgem_bo_remove_from_active(kgem, bo);
+                        bo->pitch = pitch;
+                        bo->unique_id = kgem_get_unique_id(kgem);
+                        bo->delta = 0;
+                        DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                        bo->refcnt = 1;
+                        return bo;
+                }
+        }
+        if (--retry && flags & CREATE_EXACT) {
+                if (kgem->gen >= 040) {
+                        for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
+                                if (i == tiling)
+                                        continue;
+                                cache = &kgem->active[bucket][i];
+                                list_for_each_entry(bo, cache, list) {
+                                        assert(!bo->purged);
+                                        assert(bo->refcnt == 0);
+                                        assert(bo->reusable);
+                                        assert(!bo->scanout);
+                                        assert(bo->flush == false);
+                                        if (num_pages(bo) < size)
+                                                continue;
+                                        if (!gem_set_tiling(kgem->fd,
+                                                            bo->handle,
+                                                            tiling, pitch))
+                                                continue;
+                                        kgem_bo_remove_from_active(kgem, bo);
+                                        bo->unique_id = kgem_get_unique_id(kgem);
+                                        bo->pitch = pitch;
+                                        bo->tiling = tiling;
+                                        bo->delta = 0;
+                                        DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                                             bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                                        assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                                        bo->refcnt = 1;
+                                        return bo;
+                                }
+                        }
+                }
+                bucket++;
+                goto search_again;
+        }
+        if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */
+                untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
+                i = tiling;
+                while (--i >= 0) {
+                        tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
+                                                         width, height, bpp, tiling, &pitch);
+                        cache = active(kgem, tiled_height / PAGE_SIZE, i);
+                        tiled_height = kgem_aligned_height(kgem, height, i);
+                        list_for_each_entry(bo, cache, list) {
+                                assert(!bo->purged);
+                                assert(bo->refcnt == 0);
+                                assert(bo->reusable);
+                                assert(!bo->scanout);
+                                assert(bo->flush == false);
+                                if (bo->tiling) {
+                                        if (bo->pitch < pitch) {
+                                                DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
+                                                     bo->tiling, tiling,
+                                                     bo->pitch, pitch));
+                                                continue;
+                                        }
+                                } else
+                                        bo->pitch = untiled_pitch;
+                                if (bo->pitch * tiled_height > bytes(bo))
+                                        continue;
+                                kgem_bo_remove_from_active(kgem, bo);
+                                bo->unique_id = kgem_get_unique_id(kgem);
+                                bo->delta = 0;
+                                DBG(("  1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
+                                     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                                assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                                bo->refcnt = 1;
+                                return bo;
+                        }
+                }
+        }
+skip_active_search:
+        bucket = cache_bucket(size);
+        retry = NUM_CACHE_BUCKETS - bucket;
+        if (retry > 3)
+                retry = 3;
+search_inactive:
+        /* Now just look for a close match and prefer any currently active */
+        assert(bucket < NUM_CACHE_BUCKETS);
+        cache = &kgem->inactive[bucket];
+        list_for_each_entry(bo, cache, list) {
+                assert(bucket(bo) == bucket);
+                assert(bo->reusable);
+                assert(!bo->scanout);
+                assert(bo->flush == false);
+                if (size > num_pages(bo)) {
+                        DBG(("inactive too small: %d < %d\n",
+                             num_pages(bo), size));
+                        continue;
+                }
+                if (bo->tiling != tiling ||
+                    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+                        if (!gem_set_tiling(kgem->fd, bo->handle,
+                                            tiling, pitch))
+                                continue;
+                        if (bo->map)
+                                kgem_bo_release_map(kgem, bo);
+                }
+                if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+                        kgem_bo_free(kgem, bo);
+                        break;
+                }
+                kgem_bo_remove_from_inactive(kgem, bo);
+                bo->pitch = pitch;
+                bo->tiling = tiling;
+                bo->delta = 0;
+                bo->unique_id = kgem_get_unique_id(kgem);
+                assert(bo->pitch);
+                DBG(("  from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
+                     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+                assert(bo->refcnt == 0);
+                assert(bo->reusable);
+                assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
+                ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
+                assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
+                bo->refcnt = 1;
+                return bo;
+        }
+        if (flags & CREATE_INACTIVE &&
+            !list_is_empty(&kgem->active[bucket][tiling]) &&
+            __kgem_throttle_retire(kgem, flags)) {
+                flags &= ~CREATE_INACTIVE;
+                goto search_inactive;
+        }
+        if (--retry) {
+                bucket++;
+                flags &= ~CREATE_INACTIVE;
+                goto search_inactive;
+        }
+create:
+        if (bucket >= NUM_CACHE_BUCKETS)
+                size = ALIGN(size, 1024);
+        handle = gem_create(kgem->fd, size);
+        if (handle == 0)
+                return NULL;
+        bo = __kgem_bo_alloc(handle, size);
+        if (!bo) {
+                gem_close(kgem->fd, handle);
+                return NULL;
+        }
+        bo->domain = DOMAIN_CPU;
+        bo->unique_id = kgem_get_unique_id(kgem);
+        bo->pitch = pitch;
+        if (tiling != I915_TILING_NONE &&
+            gem_set_tiling(kgem->fd, handle, tiling, pitch))
+                bo->tiling = tiling;
+        if (bucket >= NUM_CACHE_BUCKETS) {
+                DBG(("%s: marking large bo for automatic flushing\n",
+                     __FUNCTION__));
+                bo->flush = true;
+        }
+        assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
+        debug_alloc__bo(kgem, bo);
+        DBG(("  new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
+             bo->pitch, bo->tiling, bo->handle, bo->unique_id,
+             size, num_pages(bo), bucket(bo)));
+        return bo;
+}
+#if 0
+struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
+                                   int width,
+                                   int height,
+                                   int bpp,
+                                   uint32_t flags)
+{
+        struct kgem_bo *bo;
+        int stride, size;
+        if (DBG_NO_CPU)
+                return NULL;
+        DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
+        if (kgem->has_llc) {
+                bo = kgem_create_2d(kgem, width, height, bpp,
+                                    I915_TILING_NONE, flags);
+                if (bo == NULL)
+                        return bo;
+                assert(bo->tiling == I915_TILING_NONE);
+                if (kgem_bo_map__cpu(kgem, bo) == NULL) {
+                        kgem_bo_destroy(kgem, bo);
+                        return NULL;
+                }
+                return bo;
+        }
+        assert(width > 0 && height > 0);
+        stride = ALIGN(width, 2) * bpp >> 3;
+        stride = ALIGN(stride, 4);
+        size = stride * ALIGN(height, 2);
+        assert(size >= PAGE_SIZE);
+        DBG(("%s: %dx%d, %d bpp, stride=%d\n",
+             __FUNCTION__, width, height, bpp, stride));
+        bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
+        if (bo) {
+                assert(bo->tiling == I915_TILING_NONE);
+                assert(bo->snoop);
+                bo->refcnt = 1;
+                bo->pitch = stride;
+                bo->unique_id = kgem_get_unique_id(kgem);
+                return bo;
+        }
+        if (kgem->has_cacheing) {
+                bo = kgem_create_linear(kgem, size, flags);
+                if (bo == NULL)
+                        return NULL;
+                assert(bo->tiling == I915_TILING_NONE);
+                if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) {
+                        kgem_bo_destroy(kgem, bo);
+                        return NULL;
+                }
+                bo->snoop = true;
+                if (kgem_bo_map__cpu(kgem, bo) == NULL) {
+                        kgem_bo_destroy(kgem, bo);
+                        return NULL;
+                }
+                bo->pitch = stride;
+                bo->unique_id = kgem_get_unique_id(kgem);
+                return bo;
+        }
+        if (kgem->has_userptr) {
+                void *ptr;
+                /* XXX */
+                //if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
+                if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
+                        return NULL;
+                bo = kgem_create_map(kgem, ptr, size, false);
+                if (bo == NULL) {
+                        free(ptr);
+                        return NULL;
+                }
+                bo->map = MAKE_USER_MAP(ptr);
+                bo->pitch = stride;
+                bo->unique_id = kgem_get_unique_id(kgem);
+                return bo;
+        }
+                return NULL;
+}
+#endif
+void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+        DBG(("%s: handle=%d, proxy? %d\n",
+             __FUNCTION__, bo->handle, bo->proxy != NULL));
+        if (bo->proxy) {
+                _list_del(&bo->vma);
+                _list_del(&bo->request);
+                if (bo->io && bo->exec == NULL)
+                        _kgem_bo_delete_buffer(kgem, bo);
+                kgem_bo_unref(kgem, bo->proxy);
+                kgem_bo_binding_free(kgem, bo);
+                free(bo);
+                return;
+                }
+        __kgem_bo_destroy(kgem, bo);
+}
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->rq);
+        assert(bo->exec == NULL);
+        assert(bo->needs_flush);
+        /* The kernel will emit a flush *and* update its own flushing lists. */
+        if (!__kgem_busy(kgem, bo->handle))
+                __kgem_bo_clear_busy(bo);
+        DBG(("%s: handle=%d, busy?=%d\n",
+             __FUNCTION__, bo->handle, bo->rq != NULL));
+}
+inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
+{
+        return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
+}
+bool kgem_check_bo(struct kgem *kgem, ...)
+{
+        va_list ap;
+        struct kgem_bo *bo;
+        int num_exec = 0;
+        int num_pages = 0;
+        bool flush = false;
+        va_start(ap, kgem);
+        while ((bo = va_arg(ap, struct kgem_bo *))) {
+                while (bo->proxy)
+                        bo = bo->proxy;
+                if (bo->exec)
+                        continue;
+                if (needs_semaphore(kgem, bo))
+                        return false;
+                num_pages += num_pages(bo);
+                num_exec++;
+                flush |= bo->flush;
+        }
+        va_end(ap);
+        DBG(("%s: num_pages=+%d, num_exec=+%d\n",
+             __FUNCTION__, num_pages, num_exec));
+        if (!num_pages)
+                return true;
+        if (kgem_flush(kgem, flush))
+                return false;
+        if (kgem->aperture > kgem->aperture_low &&
+            kgem_ring_is_idle(kgem, kgem->ring)) {
+                DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
+                     __FUNCTION__, kgem->aperture, kgem->aperture_low));
+                return false;
+        }
+        if (num_pages + kgem->aperture > kgem->aperture_high) {
+                DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
+                     __FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
+                return false;
+        }
+        if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
+                DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
+                     kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
+                return false;
+        }
+        return true;
+}
+uint32_t kgem_add_reloc(struct kgem *kgem,
+                        uint32_t pos,
+                        struct kgem_bo *bo,
+                        uint32_t read_write_domain,
+                        uint32_t delta)
+{
+        int index;
+        DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
+             __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
+        assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
+    if( bo != NULL && bo->handle == -2)
+    {
+                if (bo->exec == NULL)
+                        kgem_add_bo(kgem, bo);
+                if (read_write_domain & 0x7fff && !bo->dirty) {
+                        assert(!bo->snoop || kgem->can_blt_cpu);
+                        __kgem_bo_mark_dirty(bo);
+                }
+        return 0;
+    };
+        index = kgem->nreloc++;
+        assert(index < ARRAY_SIZE(kgem->reloc));
+        kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
+        if (bo) {
+                assert(bo->refcnt);
+                assert(!bo->purged);
+                while (bo->proxy) {
+                        DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
+                             __FUNCTION__, bo->delta, bo->handle));
+                        delta += bo->delta;
+                        assert(bo->handle == bo->proxy->handle);
+                        /* need to release the cache upon batch submit */
+                        if (bo->exec == NULL) {
+                                list_move_tail(&bo->request,
+                                               &kgem->next_request->buffers);
+                                bo->rq = MAKE_REQUEST(kgem->next_request,
+                                                      kgem->ring);
+                                bo->exec = &_kgem_dummy_exec;
+                }
+                        if (read_write_domain & 0x7fff && !bo->dirty)
+                                __kgem_bo_mark_dirty(bo);
+                        bo = bo->proxy;
+                        assert(bo->refcnt);
+                        assert(!bo->purged);
+                }
+                if (bo->exec == NULL)
+                        kgem_add_bo(kgem, bo);
+                assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
+                assert(RQ_RING(bo->rq) == kgem->ring);
+                if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
+                        if (bo->tiling &&
+                            (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
+                                assert(kgem->nfence < kgem->fence_max);
+                                kgem->aperture_fenced +=
+                                        kgem_bo_fenced_size(kgem, bo);
+                                kgem->nfence++;
+                        }
+                        bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
+                }
+                kgem->reloc[index].delta = delta;
+                kgem->reloc[index].target_handle = bo->target_handle;
+                kgem->reloc[index].presumed_offset = bo->presumed_offset;
+                if (read_write_domain & 0x7fff && !bo->dirty) {
+                        assert(!bo->snoop || kgem->can_blt_cpu);
+                        __kgem_bo_mark_dirty(bo);
+                }
+                delta += bo->presumed_offset;
+        } else {
+                kgem->reloc[index].delta = delta;
+                kgem->reloc[index].target_handle = ~0U;
+                kgem->reloc[index].presumed_offset = 0;
+                if (kgem->nreloc__self < 256)
+                        kgem->reloc__self[kgem->nreloc__self++] = index;
+                }
+        kgem->reloc[index].read_domains = read_write_domain >> 16;
+        kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
+        return delta;
+}
+static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
+{
+        int i, j;
+        DBG(("%s: type=%d, count=%d (bucket: %d)\n",
+             __FUNCTION__, type, kgem->vma[type].count, bucket));
+        if (kgem->vma[type].count <= 0)
+               return;
+        if (kgem->need_purge)
+                kgem_purge_cache(kgem);
+        /* vma are limited on a per-process basis to around 64k.
+         * This includes all malloc arenas as well as other file
+         * mappings. In order to be fair and not hog the cache,
+         * and more importantly not to exhaust that limit and to
+         * start failing mappings, we keep our own number of open
+         * vma to within a conservative value.
+         */
+        i = 0;
+        while (kgem->vma[type].count > 0) {
+                struct kgem_bo *bo = NULL;
+                for (j = 0;
+                     bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
+                     j++) {
+                        struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
+                        if (!list_is_empty(head))
+                                bo = list_last_entry(head, struct kgem_bo, vma);
+        }
+                if (bo == NULL)
+                        break;
+                DBG(("%s: discarding inactive %s vma cache for %d\n",
+                     __FUNCTION__,
+                     IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
+                assert(IS_CPU_MAP(bo->map) == type);
+                assert(bo->map);
+                        assert(bo->rq == NULL);
+                VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
+//              munmap(MAP(bo->map), bytes(bo));
+                bo->map = NULL;
+                list_del(&bo->vma);
+                kgem->vma[type].count--;
+                if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
+                        DBG(("%s: freeing unpurgeable old mapping\n",
+                             __FUNCTION__));
+                                kgem_bo_free(kgem, bo);
+                        }
+        }
+}
+void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
+{
+        void *ptr;
+        DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
+             bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
+        assert(!bo->purged);
+        assert(bo->proxy == NULL);
+        assert(list_is_empty(&bo->list));
+        if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
+                DBG(("%s: converting request for GTT map into CPU map\n",
+                     __FUNCTION__));
+                return kgem_bo_map__cpu(kgem, bo);
+        }
+        if (IS_CPU_MAP(bo->map))
+                kgem_bo_release_map(kgem, bo);
+        ptr = bo->map;
+        if (ptr == NULL) {
+                assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
+                kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+                ptr = __kgem_bo_map__gtt(kgem, bo);
+                if (ptr == NULL)
+                        return NULL;
+                /* Cache this mapping to avoid the overhead of an
+                 * excruciatingly slow GTT pagefault. This is more an
+                 * issue with compositing managers which need to frequently
+                 * flush CPU damage to their GPU bo.
+                 */
+                bo->map = ptr;
+                DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
+        }
+        return ptr;
+}
+void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
+{
+        void *ptr;
+        DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
+             bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
+        assert(!bo->purged);
+        assert(bo->proxy == NULL);
+        assert(list_is_empty(&bo->list));
+        assert(bo->exec == NULL);
+        if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
+            (kgem->has_llc || bo->domain == DOMAIN_CPU)) {
+                DBG(("%s: converting request for GTT map into CPU map\n",
+                     __FUNCTION__));
+                ptr = kgem_bo_map__cpu(kgem, bo);
+                kgem_bo_sync__cpu(kgem, bo);
+                return ptr;
+        }
+        if (IS_CPU_MAP(bo->map))
+                kgem_bo_release_map(kgem, bo);
+        ptr = bo->map;
+        if (ptr == NULL) {
+                assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
+                assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
+                kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+                ptr = __kgem_bo_map__gtt(kgem, bo);
+                if (ptr == NULL)
+                        return NULL;
+                /* Cache this mapping to avoid the overhead of an
+                 * excruciatingly slow GTT pagefault. This is more an
+                 * issue with compositing managers which need to frequently
+                 * flush CPU damage to their GPU bo.
+                 */
+                bo->map = ptr;
+                DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
+                }
+        if (bo->domain != DOMAIN_GTT) {
+                struct drm_i915_gem_set_domain set_domain;
+                DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+                     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
+                /* XXX use PROT_READ to avoid the write flush? */
+                VG_CLEAR(set_domain);
+                set_domain.handle = bo->handle;
+                set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+                set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+                if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+                        kgem_bo_retire(kgem, bo);
+                        bo->domain = DOMAIN_GTT;
+                }
+                }
+        return ptr;
+}
+void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
+{
+        void *ptr;
+        DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
+             bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
+        assert(!bo->purged);
+        assert(bo->exec == NULL);
+        assert(list_is_empty(&bo->list));
+        if (IS_CPU_MAP(bo->map))
+                kgem_bo_release_map(kgem, bo);
+        ptr = bo->map;
+        if (ptr == NULL) {
+                assert(bytes(bo) <= kgem->aperture_mappable / 4);
+                kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
+                ptr = __kgem_bo_map__gtt(kgem, bo);
+                if (ptr == NULL)
+                        return NULL;
+                /* Cache this mapping to avoid the overhead of an
+                 * excruciatingly slow GTT pagefault. This is more an
+                 * issue with compositing managers which need to frequently
+                 * flush CPU damage to their GPU bo.
+                 */
+                bo->map = ptr;
+                DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
+        }
+        return ptr;
+}
+void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
+{
+        return kgem_bo_map__async(kgem, bo);
+}
+void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
+{
+        struct drm_i915_gem_mmap mmap_arg;
+        DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
+             __FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
+        assert(!bo->purged);
+        assert(list_is_empty(&bo->list));
+        assert(!bo->scanout);
+        assert(bo->proxy == NULL);
+        if (IS_CPU_MAP(bo->map))
+                return MAP(bo->map);
+        if (bo->map)
+                kgem_bo_release_map(kgem, bo);
+        kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
+retry:
+        VG_CLEAR(mmap_arg);
+        mmap_arg.handle = bo->handle;
+        mmap_arg.offset = 0;
+        mmap_arg.size = bytes(bo);
+        if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
+                printf("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n",
+                       __FUNCTION__, bo->handle, bytes(bo), 0);
+                if (__kgem_throttle_retire(kgem, 0))
+                        goto retry;
+                if (kgem->need_expire) {
+                        kgem_cleanup_cache(kgem);
+                        goto retry;
+                }
+                return NULL;
+        }
+        VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
+        DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
+        bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
+        return (void *)(uintptr_t)mmap_arg.addr_ptr;
+}
+void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
+{
+        assert(bo->proxy == NULL);
+        kgem_bo_submit(kgem, bo);
+        if (bo->domain != DOMAIN_CPU) {
+                struct drm_i915_gem_set_domain set_domain;
+                DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+                     bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
+                VG_CLEAR(set_domain);
+                set_domain.handle = bo->handle;
+                set_domain.read_domains = I915_GEM_DOMAIN_CPU;
+                set_domain.write_domain = I915_GEM_DOMAIN_CPU;
+                if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
+                        kgem_bo_retire(kgem, bo);
+                        bo->domain = DOMAIN_CPU;
+                }
+        }
+}
+void kgem_clear_dirty(struct kgem *kgem)
+{
+        struct list * const buffers = &kgem->next_request->buffers;
+        struct kgem_bo *bo;
+        list_for_each_entry(bo, buffers, request) {
+                if (!bo->dirty)
+                        break;
+                bo->dirty = false;
+        }
+}
+struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
+                                  struct kgem_bo *target,
+                                  int offset, int length)
+{
+        struct kgem_bo *bo;
+        DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
+             __FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
+             offset, length, target->io));
+        bo = __kgem_bo_alloc(target->handle, length);
+        if (bo == NULL)
+                return NULL;
+        bo->unique_id = kgem_get_unique_id(kgem);
+        bo->reusable = false;
+        bo->size.bytes = length;
+        bo->io = target->io && target->proxy == NULL;
+        bo->dirty = target->dirty;
+        bo->tiling = target->tiling;
+        bo->pitch = target->pitch;
+        assert(!bo->scanout);
+        bo->proxy = kgem_bo_reference(target);
+        bo->delta = offset;
+        if (target->exec) {
+                list_move_tail(&bo->request, &kgem->next_request->buffers);
+                bo->exec = &_kgem_dummy_exec;
+        }
+        bo->rq = target->rq;
+        return bo;
+}
+uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
+{
+        struct kgem_bo_binding *b;
+        for (b = &bo->binding; b && b->offset; b = b->next)
+                if (format == b->format)
+                        return b->offset;
+        return 0;
+}
+void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
+{
+        struct kgem_bo_binding *b;
+        for (b = &bo->binding; b; b = b->next) {
+                if (b->offset)
+                        continue;
+                b->offset = offset;
+                b->format = format;
+                if (b->next)
+                        b->next->offset = 0;
+                return;
+        }
+        b = malloc(sizeof(*b));
+        if (b) {
+                b->next = bo->binding.next;
+                b->format = format;
+                b->offset = offset;
+                bo->binding.next = b;
+        }
+}
+int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb)
+{
+    struct kgem_bo *bo;
+    size_t size;
+    int ret;
+        ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
+        if( ret != 0 )
+            return 0;
+    size = fb->pitch * fb->height / PAGE_SIZE;
+        bo = __kgem_bo_alloc(-2, size);
+        if (!bo) {
+                return 0;
+        }
+        bo->domain    = DOMAIN_GTT;
+        bo->unique_id = kgem_get_unique_id(kgem);
+        bo->pitch     = fb->pitch;
+    bo->tiling    = I915_TILING_NONE;
+    bo->scanout   = 1;
+        fb->fb_bo     = bo;
+//    printf("fb width %d height %d pitch %d bo %p\n",
+//            fb->width, fb->height, fb->pitch, fb->fb_bo);
+    return 1;
+};
+int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb)
+{
+    struct kgem_bo *bo;
+    size_t size;
+    int ret;
+    bo = fb->fb_bo;
+        ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
+        if( ret != 0 )
+            return 0;
+        fb->fb_bo = bo;
+    size = fb->pitch * fb->height / PAGE_SIZE;
+    if((size != bo->size.pages.count) ||
+       (fb->pitch != bo->pitch))
+    {
+        bo->size.pages.count = size;
+            bo->pitch     = fb->pitch;
+    printf("fb width %d height %d pitch %d bo %p\n",
+            fb->width, fb->height, fb->pitch, fb->fb_bo);
+        return 1;
+    }
+    return 0;
+};
+void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
+{
+    kgem_bo_destroy(kgem, bo);
+    kgem_bo_free(kgem, bo);
+}
+void kgem_close_batches(struct kgem *kgem)
+{
+    int n;
+        for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
+                while (!list_is_empty(&kgem->pinned_batches[n])) {
+                        kgem_bo_destroy(kgem,
+                                        list_first_entry(&kgem->pinned_batches[n],
+                                                         struct kgem_bo, list));
+                }
+        }
+};

 /drivers/video/Intel-2D/kgem.h
 ,10 → 28,12
 #ifndef KGEM_H
 #define KGEM_H
+#define HAS_DEBUG_FULL 1
 #include <stdint.h>
-#include <stdio.h>
 #include <stdbool.h>
 #include <stdarg.h>
+#include <stdio.h>
 #include <i915_drm.h>
 ,24 → 40,12
 #include "compiler.h"
 #include "intel_list.h"
-#include <limits.h>
-#if !defined(MAXSHORT) || !defined(MINSHORT) || \
-    !defined(MAXINT) || !defined(MININT)
-/*
- * Some implementations #define these through <math.h>, so preclude
- * #include'ing it later.
- */
+#undef  DBG
-#include <math.h>
-#undef MAXSHORT
-#define MAXSHORT SHRT_MAX
-#undef MINSHORT
-#define MINSHORT SHRT_MIN
-#undef MAXINT
-#define MAXINT INT_MAX
-#undef MININT
-#define MININT INT_MIN
+#if HAS_DEBUG_FULL
+#define DBG(x) printf x
+#else
+#define DBG(x)
 #endif
 struct kgem_bo {
 ,7 → 64,6
     void     *map;
 #define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
 #define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
-#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
         struct kgem_bo_binding {
                 struct kgem_bo_binding *next;
 ,8 → 90,7
     uint32_t pitch  : 18; /* max 128k */
         uint32_t tiling : 2;
         uint32_t reusable : 1;
-        uint32_t gpu_dirty : 1;
-        uint32_t gtt_dirty : 1;
+    uint32_t dirty  : 1;
         uint32_t domain : 2;
         uint32_t needs_flush : 1;
         uint32_t snoop : 1;
 ,7 → 173,6
         uint32_t scanout_busy:1;
         uint32_t busy:1;
-        uint32_t has_create2 :1;
         uint32_t has_userptr :1;
         uint32_t has_blt :1;
         uint32_t has_relaxed_fencing :1;
 ,9 → 180,8
         uint32_t has_semaphores :1;
         uint32_t has_secure_batches :1;
         uint32_t has_pinned_batches :1;
-        uint32_t has_caching :1;
+        uint32_t has_cacheing :1;
         uint32_t has_llc :1;
-        uint32_t has_wt :1;
         uint32_t has_no_reloc :1;
         uint32_t has_handle_lut :1;
 ,23 → 200,10
     void (*retire)(struct kgem *kgem);
         void (*expire)(struct kgem *kgem);
-#if 0
-        void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
-                                  int32_t src_stride, int32_t dst_stride,
-                                  int16_t src_x, int16_t src_y,
-                                  int16_t dst_x, int16_t dst_y,
-                                  uint16_t width, uint16_t height);
-        void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
-                                    int32_t src_stride, int32_t dst_stride,
-                                    int16_t src_x, int16_t src_y,
-                                    int16_t dst_x, int16_t dst_y,
-                                    uint16_t width, uint16_t height);
-#endif
+        uint32_t batch[64*1024-8];
+        struct drm_i915_gem_exec_object2 exec[256];
+        struct drm_i915_gem_relocation_entry reloc[4096];
         uint16_t reloc__self[256];
-        uint32_t batch[64*1024-8] page_aligned;
-        struct drm_i915_gem_exec_object2 exec[384] page_aligned;
-        struct drm_i915_gem_relocation_entry reloc[8192] page_aligned;
 #ifdef DEBUG_MEMORY
         struct {
 ,11 → 213,9
 #endif
 };
-#define KGEM_MAX_DEFERRED_VBO 16
 #define KGEM_BATCH_RESERVED 1
-#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO)
-#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO)
+#define KGEM_RELOC_RESERVED 4
+#define KGEM_EXEC_RESERVED 1
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
 ,8 → 233,6
                                 bool read_only);
 struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
-struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size);
-int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo);
 struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags);
 struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
 ,7 → 239,6
                                   struct kgem_bo *target,
                                   int offset, int length);
-void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr);
 int kgem_choose_tiling(struct kgem *kgem,
                        int tiling, int width, int height, int bpp);
 ,8 → 248,6
 #define KGEM_CAN_CREATE_LARGE   0x4
 #define KGEM_CAN_CREATE_GTT     0x8
-uint32_t kgem_get_unique_id(struct kgem *kgem);
 struct kgem_bo *
 kgem_replace_bo(struct kgem *kgem,
                 struct kgem_bo *src,
 ,6 → 281,7
 uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
 void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
+int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
 bool kgem_retire(struct kgem *kgem);
 ,11 → 304,6
         return kgem_ring_is_idle(kgem, kgem->ring);
 }
-static inline bool __kgem_ring_empty(struct kgem *kgem)
-{
-        return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]);
-}
 void _kgem_submit(struct kgem *kgem);
 static inline void kgem_submit(struct kgem *kgem)
 {
 ,8 → 325,21
                 _kgem_submit(kgem);
 }
-void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo);
+void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
+static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
+{
+        kgem_bo_submit(kgem, bo);
+        if (!bo->needs_flush)
+                return;
+        /* If the kernel fails to emit the flush, then it will be forced when
+         * we assume direct access. And as the useual failure is EIO, we do
+         * not actualy care.
+         */
+        __kgem_flush(kgem, bo);
+}
 static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
 {
         assert(bo->refcnt);
 ,9 → 367,6
         kgem_submit(kgem);
 #endif
-        if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
-                _kgem_submit(kgem);
         if (kgem->mode == mode)
                 return;
 ,11 → 419,6
 static inline uint32_t *kgem_get_batch(struct kgem *kgem)
 {
-        if (kgem->nreloc) {
-                unsigned mode = kgem->mode;
-                _kgem_submit(kgem);
-                _kgem_set_mode(kgem, mode);
-        }
         return kgem->batch + kgem->nbatch;
 }
 ,12 → 512,6
             bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
                 return false;
-        if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
-                return false;
-        if (kgem->has_llc && bo->tiling == I915_TILING_NONE)
-                return true;
         if (!bo->presumed_offset)
                 return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 ,7 → 544,7
         if (kgem_bo_mapped(kgem, bo))
                 return true;
-        if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
+        if (!bo->tiling && kgem->has_llc)
                 return true;
         if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
 ,22 → 553,6
         return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 }
-static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
-                                        struct kgem_bo *bo,
-                                        bool write)
-{
-        if (bo->purged || (bo->scanout && write))
-                return false;
-        if (kgem->has_llc)
-                return true;
-        if (bo->domain != DOMAIN_CPU)
-                return false;
-        return !write || bo->exec == NULL;
-}
 static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
 {
         assert(bo->refcnt);
 ,8 → 561,6
         return bo->snoop;
 }
-void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo);
 bool __kgem_busy(struct kgem *kgem, int handle);
 static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
 ,12 → 570,10
 inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
 {
+        bo->needs_flush = false;
+        list_del(&bo->request);
         bo->rq = NULL;
-        list_del(&bo->request);
         bo->domain = DOMAIN_NONE;
-        bo->needs_flush = false;
-        bo->gtt_dirty = false;
 }
 static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
 ,6 → 584,8
         return bo->rq;
 }
+/*
 static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
 {
         DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
 ,24 → 604,8
         return kgem_bo_is_busy(bo);
 }
-static inline bool kgem_bo_is_render(struct kgem_bo *bo)
-{
-        DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__,
-             bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq)));
-        assert(bo->refcnt);
-        return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER;
-}
+*/
-static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo)
-{
-        while (bo->proxy) {
-                bo->flush = true;
-                bo = bo->proxy;
-        }
-        bo->flush = true;
-        bo->reusable = false;
-}
 static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
 {
         if (bo == NULL)
 ,7 → 612,7
                 return false;
         assert(bo->refcnt);
-        return bo->gpu_dirty;
+        return bo->dirty;
 }
 static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
 ,7 → 632,7
              bo->handle, bo->proxy != NULL));
         bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
-        bo->needs_flush = bo->gpu_dirty = true;
+        bo->needs_flush = bo->dirty = true;
         list_move(&bo->request, &RQ(bo->rq)->buffers);
 }
 ,7 → 643,7
                 assert(bo->exec);
                 assert(bo->rq);
-                if (bo->gpu_dirty)
+                if (bo->dirty)
                         return;
                 __kgem_bo_mark_dirty(bo);
 ,9 → 672,6
 void kgem_purge_cache(struct kgem *kgem);
 void kgem_cleanup_cache(struct kgem *kgem);
-void kgem_clean_scanout_cache(struct kgem *kgem);
-void kgem_clean_large_cache(struct kgem *kgem);
 #if HAS_DEBUG_FULL
 void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
 #else

 /drivers/video/Intel-2D/kgem_debug.c
 ,6 → 37,22
 #include "kgem_debug.h"
+#include <kos32sys.h>
+/*
+void
+ErrorF(const char *f, ...)
+{
+    va_list args;
+    va_start(args, f);
+    VErrorF(f, args);
+    va_end(args);
+}
+*/
+#define ErrorF printf
 struct drm_i915_gem_relocation_entry *
 kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset)
 {
 ,6 → 388,8
 static int (*decode_3d(int gen))(struct kgem*, uint32_t)
 {
+        return kgem_gen6_decode_3d;
+/*
         if (gen >= 0100) {
         } else if (gen >= 070) {
                 return kgem_gen7_decode_3d;
 ,12 → 401,18
                 return kgem_gen4_decode_3d;
         } else if (gen >= 030) {
                 return kgem_gen3_decode_3d;
+        } else if (gen >= 020) {
+                return kgem_gen2_decode_3d;
         }
         assert(0);
+*/
 }
 static void (*finish_state(int gen))(struct kgem*)
 {
+    return kgem_gen6_finish_state;
+/*
         if (gen >= 0100) {
         } else if (gen >= 070) {
                 return kgem_gen7_finish_state;
 ,8 → 424,11
                 return kgem_gen4_finish_state;
         } else if (gen >= 030) {
                 return kgem_gen3_finish_state;
+        } else if (gen >= 020) {
+                return kgem_gen2_finish_state;
         }
         assert(0);
+*/
 }
 void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)

 /drivers/video/Intel-2D/kgem_debug_gen6.c
 ,6 → 39,8
 #include "kgem_debug.h"
+#define ErrorF printf
 static struct state {
         struct vertex_buffer {
                 int handle;

 /drivers/video/Intel-2D/gen4_render.c
 ,7 → 51,6
  */
 #define FORCE_SPANS 0
 #define FORCE_NONRECTILINEAR_SPANS -1
-#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 #define NO_COMPOSITE 0
 #define NO_COMPOSITE_SPANS 0
 ,8 → 61,6
 #define NO_FILL_BOXES 0
 #define NO_VIDEO 0
-#define MAX_FLUSH_VERTICES 6
 #define GEN4_GRF_BLOCKS(nreg)    ((nreg + 15) / 16 - 1)
 /* Set up a default static partitioning of the URB, which is supposed to
 ,24 → 286,8
         return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
     case PICT_x8r8g8b8:
         return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
-        case PICT_a8b8g8r8:
-                return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_x8b8g8r8:
-                return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
-        case PICT_a2r10g10b10:
-                return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_x2r10g10b10:
-                return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
-        case PICT_r8g8b8:
-                return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
-        case PICT_r5g6b5:
-                return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_a1r5g5b5:
-                return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
     case PICT_a8:
         return GEN4_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-                return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
     }
 }
 ,22 → 299,8
     case PICT_a8r8g8b8:
     case PICT_x8r8g8b8:
         return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
-        case PICT_a8b8g8r8:
-        case PICT_x8b8g8r8:
-                return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_a2r10g10b10:
-        case PICT_x2r10g10b10:
-                return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_r5g6b5:
-                return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_x1r5g5b5:
-        case PICT_a1r5g5b5:
-                return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
     case PICT_a8:
         return GEN4_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-        case PICT_x4r4g4b4:
-                return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
     }
 }
 ,7 → 393,7
         assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
         /* After the first bind, we manage the cache domains within the batch */
-        offset = kgem_bo_get_binding(bo, format | is_dst << 31);
+        offset = kgem_bo_get_binding(bo, format);
         if (offset) {
                 if (is_dst)
                         kgem_bo_mark_dirty(bo);
 ,10 → 408,9
                  GEN4_SURFACE_BLEND_ENABLED |
                  format << GEN4_SURFACE_FORMAT_SHIFT);
-        if (is_dst) {
-                ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
+        if (is_dst)
                 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-        } else
+        else
                 domains = I915_GEM_DOMAIN_SAMPLER << 16;
         ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 ,7 → 421,7
         ss[4] = 0;
         ss[5] = 0;
-        kgem_bo_set_binding(bo, format | is_dst << 31, offset);
+        kgem_bo_set_binding(bo, format, offset);
         DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
              offset, bo->handle, ss[1],
 ,7 → 476,7
 static bool gen4_rectangle_begin(struct sna *sna,
                                  const struct sna_composite_op *op)
 {
-        unsigned int id = 1 << op->u.gen4.ve_id;
+        int id = op->u.gen4.ve_id;
         int ndwords;
         if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
 ,14 → 484,13
         /* 7xpipelined pointers + 6xprimitive + 1xflush */
         ndwords = op->need_magic_ca_pass? 20 : 6;
-        if ((sna->render.vb_id & id) == 0)
+        if ((sna->render.vb_id & (1 << id)) == 0)
                 ndwords += 5;
-        ndwords += 2*FORCE_FLUSH;
         if (!kgem_check_batch(&sna->kgem, ndwords))
                 return false;
-        if ((sna->render.vb_id & id) == 0)
+        if ((sna->render.vb_id & (1 << id)) == 0)
                 gen4_emit_vertex_buffer(sna, op);
         if (sna->render.vertex_offset == 0)
                 gen4_emit_primitive(sna);
 ,8 → 508,7
                         return rem;
         }
-        if (!kgem_check_batch(&sna->kgem,
-*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
+        if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
                 return 0;
         if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
                 return 0;
 ,27 → 534,6
         int rem;
         assert(want);
-#if FORCE_FLUSH
-        rem = sna->render.vertex_offset;
-        if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
-                rem = sna->kgem.nbatch - 5;
-        if (rem) {
-                rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
-                if (rem <= 0) {
-                        if (sna->render.vertex_offset) {
-                                gen4_vertex_flush(sna);
-                                if (gen4_magic_ca_pass(sna, op))
-                                        gen4_emit_pipelined_pointers(sna, op, op->op,
-                                                                     op->u.gen4.wm_kernel);
-                        }
-                        OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
-                        rem = MAX_FLUSH_VERTICES;
-                }
-        } else
-                rem = MAX_FLUSH_VERTICES;
-        if (want > rem)
-                want = rem;
-#endif
 start:
         rem = vertex_space(sna);
 ,8 → 552,8
                         goto start;
         }
+        assert(op->floats_per_rect >= vertex_space(sna));
         assert(rem <= vertex_space(sna));
-        assert(op->floats_per_rect <= rem);
         if (want > 1 && want * op->floats_per_rect > rem)
                 want = rem / op->floats_per_rect;
 ,7 → 673,7
 {
         kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
-        if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
+        if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
                 DBG(("%s: flushing batch: %d < %d+%d\n",
                      __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
 , 4*8));
 ,8 → 904,6
 {
         bool flush;
-        assert(op->dst.bo->exec);
         flush = wm_binding_table & 1;
         if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
                 DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
 ,450 → 984,23
         op->prim_emit(sna, op, r);
 }
-#if 0
-fastcall static void
-gen4_render_composite_box(struct sna *sna,
-                          const struct sna_composite_op *op,
-                          const BoxRec *box)
-{
-        struct sna_composite_rectangles r;
-        DBG(("  %s: (%d, %d), (%d, %d)\n",
-             __FUNCTION__,
-             box->x1, box->y1, box->x2, box->y2));
-        gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
-        r.dst.x = box->x1;
-        r.dst.y = box->y1;
-        r.width  = box->x2 - box->x1;
-        r.height = box->y2 - box->y1;
-        r.mask = r.src = r.dst;
-        op->prim_emit(sna, op, &r);
-}
-static void
-gen4_render_composite_boxes__blt(struct sna *sna,
-                                 const struct sna_composite_op *op,
-                                 const BoxRec *box, int nbox)
-{
-        DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
-             __FUNCTION__, nbox, op->dst.x, op->dst.y,
-             op->src.offset[0], op->src.offset[1],
-             op->src.width, op->src.height,
-             op->mask.offset[0], op->mask.offset[1],
-             op->mask.width, op->mask.height));
-        do {
-                int nbox_this_time;
-                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
-                                                     gen4_bind_surfaces);
-                nbox -= nbox_this_time;
-                do {
-                        struct sna_composite_rectangles r;
-                        DBG(("  %s: (%d, %d), (%d, %d)\n",
-                             __FUNCTION__,
-                             box->x1, box->y1, box->x2, box->y2));
-                        r.dst.x = box->x1;
-                        r.dst.y = box->y1;
-                        r.width  = box->x2 - box->x1;
-                        r.height = box->y2 - box->y1;
-                        r.mask = r.src = r.dst;
-                        op->prim_emit(sna, op, &r);
-                        box++;
-                } while (--nbox_this_time);
-        } while (nbox);
-}
-static void
-gen4_render_composite_boxes(struct sna *sna,
-                            const struct sna_composite_op *op,
-                            const BoxRec *box, int nbox)
-{
-        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
-        do {
-                int nbox_this_time;
-                float *v;
-                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
-                                                     gen4_bind_surfaces);
-                assert(nbox_this_time);
-                nbox -= nbox_this_time;
-                v = sna->render.vertices + sna->render.vertex_used;
-                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
-                op->emit_boxes(op, box, nbox_this_time, v);
-                box += nbox_this_time;
-        } while (nbox);
-}
-#if !FORCE_FLUSH
-static void
-gen4_render_composite_boxes__thread(struct sna *sna,
-                                    const struct sna_composite_op *op,
-                                    const BoxRec *box, int nbox)
-{
-        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
-        sna_vertex_lock(&sna->render);
-        do {
-                int nbox_this_time;
-                float *v;
-                nbox_this_time = gen4_get_rectangles(sna, op, nbox,
-                                                     gen4_bind_surfaces);
-                assert(nbox_this_time);
-                nbox -= nbox_this_time;
-                v = sna->render.vertices + sna->render.vertex_used;
-                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
-                sna_vertex_acquire__locked(&sna->render);
-                sna_vertex_unlock(&sna->render);
-                op->emit_boxes(op, box, nbox_this_time, v);
-                box += nbox_this_time;
-                sna_vertex_lock(&sna->render);
-                sna_vertex_release__locked(&sna->render);
-        } while (nbox);
-        sna_vertex_unlock(&sna->render);
-}
-#endif
-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-static uint32_t gen4_bind_video_source(struct sna *sna,
-                                       struct kgem_bo *src_bo,
-                                       uint32_t src_offset,
-                                       int src_width,
-                                       int src_height,
-                                       int src_pitch,
-                                       uint32_t src_surf_format)
-{
-        struct gen4_surface_state *ss;
-        sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
-        ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
-        ss->ss0.surface_type = GEN4_SURFACE_2D;
-        ss->ss0.surface_format = src_surf_format;
-        ss->ss0.color_blend = 1;
-        ss->ss1.base_addr =
-                kgem_add_reloc(&sna->kgem,
-                               sna->kgem.surface + 1,
-                               src_bo,
-                               I915_GEM_DOMAIN_SAMPLER << 16,
-                               src_offset);
-        ss->ss2.width  = src_width - 1;
-        ss->ss2.height = src_height - 1;
-        ss->ss3.pitch  = src_pitch - 1;
-        return sna->kgem.surface * sizeof(uint32_t);
-}
-static void gen4_video_bind_surfaces(struct sna *sna,
-                                     const struct sna_composite_op *op)
-{
-        bool dirty = kgem_bo_is_dirty(op->dst.bo);
-        struct sna_video_frame *frame = op->priv;
-        uint32_t src_surf_format;
-        uint32_t src_surf_base[6];
-        int src_width[6];
-        int src_height[6];
-        int src_pitch[6];
-        uint32_t *binding_table;
-        uint16_t offset;
-        int n_src, n;
-        src_surf_base[0] = 0;
-        src_surf_base[1] = 0;
-        src_surf_base[2] = frame->VBufOffset;
-        src_surf_base[3] = frame->VBufOffset;
-        src_surf_base[4] = frame->UBufOffset;
-        src_surf_base[5] = frame->UBufOffset;
-        if (is_planar_fourcc(frame->id)) {
-                src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
-                src_width[1]  = src_width[0]  = frame->width;
-                src_height[1] = src_height[0] = frame->height;
-                src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
-                src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
-                        frame->width / 2;
-                src_height[4] = src_height[5] = src_height[2] = src_height[3] =
-                        frame->height / 2;
-                src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
-                        frame->pitch[0];
-                n_src = 6;
-        } else {
-                if (frame->id == FOURCC_UYVY)
-                        src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
-                else
-                        src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
-                src_width[0]  = frame->width;
-                src_height[0] = frame->height;
-                src_pitch[0]  = frame->pitch[0];
-                n_src = 1;
-        }
-        gen4_get_batch(sna, op);
-        binding_table = gen4_composite_get_binding_table(sna, &offset);
-        binding_table[0] =
-                gen4_bind_bo(sna,
-                             op->dst.bo, op->dst.width, op->dst.height,
-                             gen4_get_dest_format(op->dst.format),
-                             true);
-        for (n = 0; n < n_src; n++) {
-                binding_table[1+n] =
-                        gen4_bind_video_source(sna,
-                                               frame->bo,
-                                               src_surf_base[n],
-                                               src_width[n],
-                                               src_height[n],
-                                               src_pitch[n],
-                                               src_surf_format);
-        }
-        gen4_emit_state(sna, op, offset | dirty);
-}
-static bool
-gen4_render_video(struct sna *sna,
-                  struct sna_video *video,
-                  struct sna_video_frame *frame,
-                  RegionPtr dstRegion,
-                  PixmapPtr pixmap)
-{
-        struct sna_composite_op tmp;
-        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
-        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
-        int src_width = frame->src.x2 - frame->src.x1;
-        int src_height = frame->src.y2 - frame->src.y1;
-        float src_offset_x, src_offset_y;
-        float src_scale_x, src_scale_y;
-        int nbox, pix_xoff, pix_yoff;
-        struct sna_pixmap *priv;
-        BoxPtr box;
-        DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
-             src_width, src_height, dst_width, dst_height));
-        priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
-        if (priv == NULL)
-                return false;
-        memset(&tmp, 0, sizeof(tmp));
-        tmp.op = PictOpSrc;
-        tmp.dst.pixmap = pixmap;
-        tmp.dst.width  = pixmap->drawable.width;
-        tmp.dst.height = pixmap->drawable.height;
-        tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
-        tmp.dst.bo = priv->gpu_bo;
-        if (src_width == dst_width && src_height == dst_height)
-                tmp.src.filter = SAMPLER_FILTER_NEAREST;
-        else
-                tmp.src.filter = SAMPLER_FILTER_BILINEAR;
-        tmp.src.repeat = SAMPLER_EXTEND_PAD;
-        tmp.src.bo = frame->bo;
-        tmp.mask.bo = NULL;
-        tmp.u.gen4.wm_kernel =
-                is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
-        tmp.u.gen4.ve_id = 2;
-        tmp.is_affine = true;
-        tmp.floats_per_vertex = 3;
-        tmp.floats_per_rect = 9;
-        tmp.priv = frame;
-        if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
-                kgem_submit(&sna->kgem);
-                assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
-        }
-        gen4_video_bind_surfaces(sna, &tmp);
-        gen4_align_vertex(sna, &tmp);
-        /* Set up the offset for translating from the given region (in screen
-         * coordinates) to the backing pixmap.
-         */
-#ifdef COMPOSITE
-        pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
-        pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
-#else
-        pix_xoff = 0;
-        pix_yoff = 0;
-#endif
-        src_scale_x = (float)src_width / dst_width / frame->width;
-        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
-        src_scale_y = (float)src_height / dst_height / frame->height;
-        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
-        box = REGION_RECTS(dstRegion);
-        nbox = REGION_NUM_RECTS(dstRegion);
-        do {
-                int n;
-                n = gen4_get_rectangles(sna, &tmp, nbox,
-                                        gen4_video_bind_surfaces);
-                assert(n);
-                nbox -= n;
-                do {
-                        BoxRec r;
-                        r.x1 = box->x1 + pix_xoff;
-                        r.x2 = box->x2 + pix_xoff;
-                        r.y1 = box->y1 + pix_yoff;
-                        r.y2 = box->y2 + pix_yoff;
-                        OUT_VERTEX(r.x2, r.y2);
-                        OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
-                        OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                        OUT_VERTEX(r.x1, r.y2);
-                        OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                        OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                        OUT_VERTEX(r.x1, r.y1);
-                        OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                        OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
-                        if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
-                                sna_damage_add_box(&priv->gpu_damage, &r);
-                                sna_damage_subtract_box(&priv->cpu_damage, &r);
-                        }
-                        box++;
-                } while (--n);
-        } while (nbox);
-        gen4_vertex_flush(sna);
-        return true;
-}
-static int
-gen4_composite_picture(struct sna *sna,
-                       PicturePtr picture,
-                       struct sna_composite_channel *channel,
-                       int x, int y,
-                       int w, int h,
-                       int dst_x, int dst_y,
-                       bool precise)
-{
-        PixmapPtr pixmap;
-        uint32_t color;
-        int16_t dx, dy;
-        DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
-             __FUNCTION__, x, y, w, h, dst_x, dst_y));
-        channel->is_solid = false;
-        channel->card_format = -1;
-        if (sna_picture_is_solid(picture, &color))
-                return gen4_channel_init_solid(sna, channel, color);
-        if (picture->pDrawable == NULL) {
-                int ret;
-                if (picture->pSourcePict->type == SourcePictTypeLinear)
-                        return gen4_channel_init_linear(sna, picture, channel,
-                                                        x, y,
-                                                        w, h,
-                                                        dst_x, dst_y);
-                DBG(("%s -- fixup, gradient\n", __FUNCTION__));
-                ret = -1;
-                if (!precise)
-                        ret = sna_render_picture_approximate_gradient(sna, picture, channel,
-                                                                      x, y, w, h, dst_x, dst_y);
-                if (ret == -1)
-                        ret = sna_render_picture_fixup(sna, picture, channel,
-                                                       x, y, w, h, dst_x, dst_y);
-                return ret;
-        }
-        if (picture->alphaMap) {
-                DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        }
-        if (!gen4_check_repeat(picture)) {
-                DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        }
-        if (!gen4_check_filter(picture)) {
-                DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
-                return sna_render_picture_fixup(sna, picture, channel,
-                                                x, y, w, h, dst_x, dst_y);
-        }
-        channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
-        channel->filter = picture->filter;
-        pixmap = get_drawable_pixmap(picture->pDrawable);
-        get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
-        x += dx + picture->pDrawable->x;
-        y += dy + picture->pDrawable->y;
-        channel->is_affine = sna_transform_is_affine(picture->transform);
-        if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
-                DBG(("%s: integer translation (%d, %d), removing\n",
-                     __FUNCTION__, dx, dy));
-                x += dx;
-                y += dy;
-                channel->transform = NULL;
-                channel->filter = PictFilterNearest;
-        } else
-                channel->transform = picture->transform;
-        channel->pict_format = picture->format;
-        channel->card_format = gen4_get_card_format(picture->format);
-        if (channel->card_format == -1)
-                return sna_render_picture_convert(sna, picture, channel, pixmap,
-                                                  x, y, w, h, dst_x, dst_y,
-                                                  false);
-        if (too_large(pixmap->drawable.width, pixmap->drawable.height))
-                return sna_render_picture_extract(sna, picture, channel,
-                                                  x, y, w, h, dst_x, dst_y);
-        return sna_render_pixmap_bo(sna, channel, pixmap,
-                                    x, y, w, h, dst_x, dst_y);
-}
-static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
-{
-        DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
-             __FUNCTION__,
-             channel->repeat, gen4_repeat(channel->repeat),
-             channel->filter, gen4_repeat(channel->filter)));
-        channel->repeat = gen4_repeat(channel->repeat);
-        channel->filter = gen4_filter(channel->filter);
-        if (channel->card_format == (unsigned)-1)
-                channel->card_format = gen4_get_card_format(channel->pict_format);
-}
-#endif
 static void
 gen4_render_composite_done(struct sna *sna,
                            const struct sna_composite_op *op)
 ,409 → 1014,113
 }
-#if 0
-static bool
-gen4_composite_set_target(struct sna *sna,
-                          struct sna_composite_op *op,
-                          PicturePtr dst,
-                          int x, int y, int w, int h,
-                          bool partial)
-{
-        BoxRec box;
-        op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
-        op->dst.width  = op->dst.pixmap->drawable.width;
-        op->dst.height = op->dst.pixmap->drawable.height;
-        op->dst.format = dst->format;
-        if (w && h) {
-                box.x1 = x;
-                box.y1 = y;
-                box.x2 = x + w;
-                box.y2 = y + h;
-        } else
-                sna_render_picture_extents(dst, &box);
-        op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
-                                          PREFER_GPU | FORCE_GPU | RENDER_GPU,
-                                          &box, &op->damage);
-        if (op->dst.bo == NULL)
-                return false;
-        get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
-                            &op->dst.x, &op->dst.y);
-        DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
-             __FUNCTION__,
-             op->dst.pixmap, (int)op->dst.format,
-             op->dst.width, op->dst.height,
-             op->dst.bo->pitch,
-             op->dst.x, op->dst.y,
-             op->damage ? *op->damage : (void *)-1));
-        assert(op->dst.bo->proxy == NULL);
-        if (too_large(op->dst.width, op->dst.height) &&
-            !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
-                return false;
-        return true;
-}
-static bool
-try_blt(struct sna *sna,
-        PicturePtr dst, PicturePtr src,
-        int width, int height)
-{
-        if (sna->kgem.mode != KGEM_RENDER) {
-                DBG(("%s: already performing BLT\n", __FUNCTION__));
-                return true;
-        }
-        if (too_large(width, height)) {
-                DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
-                     __FUNCTION__, width, height));
-                return true;
-        }
-        if (too_large(dst->pDrawable->width, dst->pDrawable->height))
-                return true;
-        /* The blitter is much faster for solids */
-        if (sna_picture_is_solid(src, NULL))
-                return true;
-        /* is the source picture only in cpu memory e.g. a shm pixmap? */
-        return picture_is_cpu(sna, src);
-}
-static bool
-check_gradient(PicturePtr picture, bool precise)
-{
-        switch (picture->pSourcePict->type) {
-        case SourcePictTypeSolidFill:
-        case SourcePictTypeLinear:
-                return false;
-        default:
-                return precise;
-        }
-}
-static bool
-has_alphamap(PicturePtr p)
-{
-        return p->alphaMap != NULL;
-}
-static bool
-need_upload(struct sna *sna, PicturePtr p)
-{
-        return p->pDrawable && untransformed(p) &&
-                !is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
-}
-static bool
-source_is_busy(PixmapPtr pixmap)
-{
-        struct sna_pixmap *priv = sna_pixmap(pixmap);
-        if (priv == NULL)
-                return false;
-        if (priv->clear)
-                return false;
-        if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
-                return true;
-        if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
-                return true;
-        return priv->gpu_damage && !priv->cpu_damage;
-}
-static bool
-source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
-{
-        if (sna_picture_is_solid(p, NULL))
-                return false;
-        if (p->pSourcePict)
-                return check_gradient(p, precise);
-        if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
-                return true;
-        /* soft errors: perfer to upload/compute rather than readback */
-        if (pixmap && source_is_busy(pixmap))
-                return false;
-        return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
-}
-static bool
-gen4_composite_fallback(struct sna *sna,
-                        PicturePtr src,
-                        PicturePtr mask,
-                        PicturePtr dst)
-{
-        PixmapPtr src_pixmap;
-        PixmapPtr mask_pixmap;
-        PixmapPtr dst_pixmap;
-        bool src_fallback, mask_fallback;
-        if (!gen4_check_dst_format(dst->format)) {
-                DBG(("%s: unknown destination format: %d\n",
-                     __FUNCTION__, dst->format));
-                return true;
-        }
-        dst_pixmap = get_drawable_pixmap(dst->pDrawable);
-        src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
-        src_fallback = source_fallback(sna, src, src_pixmap,
-                                       dst->polyMode == PolyModePrecise);
-        if (mask) {
-                mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
-                mask_fallback = source_fallback(sna, mask, mask_pixmap,
-                                                dst->polyMode == PolyModePrecise);
-        } else {
-                mask_pixmap = NULL;
-                mask_fallback = false;
-        }
-        /* If we are using the destination as a source and need to
-         * readback in order to upload the source, do it all
-         * on the cpu.
-         */
-        if (src_pixmap == dst_pixmap && src_fallback) {
-                DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
-                return true;
-        }
-        if (mask_pixmap == dst_pixmap && mask_fallback) {
-                DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
-                return true;
-        }
-        /* If anything is on the GPU, push everything out to the GPU */
-        if (dst_use_gpu(dst_pixmap)) {
-                DBG(("%s: dst is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        if (src_pixmap && !src_fallback) {
-                DBG(("%s: src is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        if (mask_pixmap && !mask_fallback) {
-                DBG(("%s: mask is already on the GPU, try to use GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        /* However if the dst is not on the GPU and we need to
-         * render one of the sources using the CPU, we may
-         * as well do the entire operation in place onthe CPU.
-         */
-        if (src_fallback) {
-                DBG(("%s: dst is on the CPU and src will fallback\n",
-                     __FUNCTION__));
-                return true;
-        }
-        if (mask_fallback) {
-                DBG(("%s: dst is on the CPU and mask will fallback\n",
-                     __FUNCTION__));
-                return true;
-        }
-        if (too_large(dst_pixmap->drawable.width,
-                      dst_pixmap->drawable.height) &&
-            dst_is_cpu(dst_pixmap)) {
-                DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
-                return true;
-        }
-        DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
-             __FUNCTION__));
-        return dst_use_cpu(dst_pixmap);
-}
-static int
-reuse_source(struct sna *sna,
-             PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
-             PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
-{
-        uint32_t color;
-        if (src_x != msk_x || src_y != msk_y)
-                return false;
-        if (src == mask) {
-                DBG(("%s: mask is source\n", __FUNCTION__));
-                *mc = *sc;
-                mc->bo = kgem_bo_reference(mc->bo);
-                return true;
-        }
-        if (sna_picture_is_solid(mask, &color))
-                return gen4_channel_init_solid(sna, mc, color);
-        if (sc->is_solid)
-                return false;
-        if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
-                return false;
-        DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
-        if (!sna_transform_equal(src->transform, mask->transform))
-                return false;
-        if (!sna_picture_alphamap_equal(src, mask))
-                return false;
-        if (!gen4_check_repeat(mask))
-                return false;
-        if (!gen4_check_filter(mask))
-                return false;
-        if (!gen4_check_format(mask->format))
-                return false;
-        DBG(("%s: reusing source channel for mask with a twist\n",
-             __FUNCTION__));
-        *mc = *sc;
-        mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
-        mc->filter = gen4_filter(mask->filter);
-        mc->pict_format = mask->format;
-        mc->card_format = gen4_get_card_format(mask->format);
-        mc->bo = kgem_bo_reference(mc->bo);
-        return true;
-}
 static bool
-gen4_render_composite(struct sna *sna,
-                      uint8_t op,
-                      PicturePtr src,
-                      PicturePtr mask,
-                      PicturePtr dst,
-                      int16_t src_x, int16_t src_y,
-                      int16_t msk_x, int16_t msk_y,
-                      int16_t dst_x, int16_t dst_y,
-                      int16_t width, int16_t height,
+gen4_blit_tex(struct sna *sna,
+              uint8_t op, bool scale,
+                      PixmapPtr src, struct kgem_bo *src_bo,
+                      PixmapPtr mask,struct kgem_bo *mask_bo,
+                      PixmapPtr dst, struct kgem_bo *dst_bo,
+              int32_t src_x, int32_t src_y,
+              int32_t msk_x, int32_t msk_y,
+              int32_t dst_x, int32_t dst_y,
+              int32_t width, int32_t height,
                       struct sna_composite_op *tmp)
 {
         DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
-             width, height, sna->kgem.mode));
+         width, height, sna->kgem.ring));
-        if (op >= ARRAY_SIZE(gen4_blend_op))
-                return false;
+    tmp->op = PictOpSrc;
-        if (mask == NULL &&
-            try_blt(sna, dst, src, width, height) &&
-            sna_blt_composite(sna, op,
-                              src, dst,
-                              src_x, src_y,
-                              dst_x, dst_y,
-                              width, height,
-                              tmp, false))
-                return true;
+    tmp->dst.pixmap = dst;
+    tmp->dst.bo     = dst_bo;
+    tmp->dst.width  = dst->drawable.width;
+    tmp->dst.height = dst->drawable.height;
+    tmp->dst.format = PICT_x8r8g8b8;
-        if (gen4_composite_fallback(sna, src, mask, dst))
-                return false;
-        if (need_tiling(sna, width, height))
-                return sna_tiling_composite(op, src, mask, dst,
-                                            src_x, src_y,
-                                            msk_x, msk_y,
-                                            dst_x, dst_y,
-                                            width, height,
-                                            tmp);
+        tmp->src.repeat = RepeatNone;
+        tmp->src.filter = PictFilterNearest;
+    tmp->src.is_affine = true;
-        if (!gen4_composite_set_target(sna, tmp, dst,
-                                       dst_x, dst_y, width, height,
-                                       op > PictOpSrc || dst->pCompositeClip->data)) {
-                DBG(("%s: failed to set composite target\n", __FUNCTION__));
-                return false;
-        }
+    tmp->src.bo = src_bo;
+        tmp->src.pict_format = PICT_x8r8g8b8;
+    tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
+    tmp->src.width  = src->drawable.width;
+    tmp->src.height = src->drawable.height;
-        tmp->op = op;
-        switch (gen4_composite_picture(sna, src, &tmp->src,
-                                       src_x, src_y,
-                                       width, height,
-                                       dst_x, dst_y,
-                                       dst->polyMode == PolyModePrecise)) {
-        case -1:
-                DBG(("%s: failed to prepare source\n", __FUNCTION__));
-                goto cleanup_dst;
-        case 0:
-                if (!gen4_channel_init_solid(sna, &tmp->src, 0))
-                        goto cleanup_dst;
-                /* fall through to fixup */
-        case 1:
-                if (mask == NULL &&
-                    sna_blt_composite__convert(sna,
-                                               dst_x, dst_y, width, height,
-                                               tmp))
-                        return true;
-                gen4_composite_channel_convert(&tmp->src);
-                break;
-        }
         tmp->is_affine = tmp->src.is_affine;
         tmp->has_component_alpha = false;
         tmp->need_magic_ca_pass = false;
-        if (mask) {
-                if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
-                        tmp->has_component_alpha = true;
+        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+    tmp->mask.is_affine = true;
-                        /* Check if it's component alpha that relies on a source alpha and on
-                         * the source value.  We can only get one of those into the single
-                         * source value that we get to blend with.
-                         */
-                        if (gen4_blend_op[op].src_alpha &&
-                            (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
-                                if (op != PictOpOver) {
-                                        DBG(("%s -- fallback: unhandled component alpha blend\n",
-                                             __FUNCTION__));
+    tmp->mask.bo = mask_bo;
+    tmp->mask.pict_format = PIXMAN_a8;
+    tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
+    tmp->mask.width  = mask->drawable.width;
+    tmp->mask.height = mask->drawable.height;
-                                        goto cleanup_src;
+    if( scale )
+    {
+        tmp->src.scale[0] = 1.f/width;
+        tmp->src.scale[1] = 1.f/height;
                                 }
-                                tmp->need_magic_ca_pass = true;
-                                tmp->op = PictOpOutReverse;
+    else
+    {
+        tmp->src.scale[0] = 1.f/src->drawable.width;
+        tmp->src.scale[1] = 1.f/src->drawable.height;
                         }
-                }
+//    tmp->src.offset[0] = -dst_x;
+//    tmp->src.offset[1] = -dst_y;
-                if (!reuse_source(sna,
-                                  src, &tmp->src, src_x, src_y,
-                                  mask, &tmp->mask, msk_x, msk_y)) {
-                        switch (gen4_composite_picture(sna, mask, &tmp->mask,
-                                                       msk_x, msk_y,
-                                                       width, height,
-                                                       dst_x, dst_y,
-                                                       dst->polyMode == PolyModePrecise)) {
-                        case -1:
-                                DBG(("%s: failed to prepare mask\n", __FUNCTION__));
-                                goto cleanup_src;
-                        case 0:
-                                if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
-                                        goto cleanup_src;
-                                /* fall through to fixup */
-                        case 1:
-                                gen4_composite_channel_convert(&tmp->mask);
-                                break;
-                        }
-                }
-                tmp->is_affine &= tmp->mask.is_affine;
-        }
+    tmp->mask.scale[0] = 1.f/mask->drawable.width;
+    tmp->mask.scale[1] = 1.f/mask->drawable.height;
+//    tmp->mask.offset[0] = -dst_x;
+//    tmp->mask.offset[1] = -dst_y;
         tmp->u.gen4.wm_kernel =
                 gen4_choose_composite_kernel(tmp->op,
 ,17 → 1127,9
                                              tmp->mask.bo != NULL,
                                              tmp->has_component_alpha,
                                              tmp->is_affine);
-        tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
+        tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
         tmp->blt   = gen4_render_composite_blt;
-        tmp->box   = gen4_render_composite_box;
-        tmp->boxes = gen4_render_composite_boxes__blt;
-        if (tmp->emit_boxes) {
-                tmp->boxes = gen4_render_composite_boxes;
-#if !FORCE_FLUSH
-                tmp->thread_boxes = gen4_render_composite_boxes__thread;
-#endif
-        }
         tmp->done  = gen4_render_composite_done;
         if (!kgem_check_bo(&sna->kgem,
 ,70 → 1136,13
                            tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
                            NULL)) {
                 kgem_submit(&sna->kgem);
-                if (!kgem_check_bo(&sna->kgem,
-                                     tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                                     NULL))
-                        goto cleanup_mask;
         }
         gen4_bind_surfaces(sna, tmp);
         gen4_align_vertex(sna, tmp);
         return true;
-cleanup_mask:
-        if (tmp->mask.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
-cleanup_src:
-        if (tmp->src.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->src.bo);
-cleanup_dst:
-        if (tmp->redirect.real_bo)
-                kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
-        return false;
 }
-#endif
 static void
 gen4_render_flush(struct sna *sna)
 {
 ,7 → 1230,7
 }
 static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
-                                     uint32_t kernel)
+                                     int gen, uint32_t kernel)
 {
         struct gen4_sf_unit_state *sf;
 ,7 → 1384,7
         }
         state->vs = gen4_create_vs_unit_state(&general);
-        state->sf = gen4_create_sf_state(&general, sf);
+        state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
         wm_state = sna_static_stream_map(&general,
                                           sizeof(*wm_state) * KERNEL_COUNT *
 ,52 → 1421,20
         return state->general_bo != NULL;
 }
-const char *gen4_render_init(struct sna *sna, const char *backend)
+bool gen4_render_init(struct sna *sna)
 {
         if (!gen4_render_setup(sna))
-                return backend;
+                return false;
         sna->kgem.retire = gen4_render_retire;
         sna->kgem.expire = gen4_render_expire;
-#if 0
-#if !NO_COMPOSITE
-        sna->render.composite = gen4_render_composite;
         sna->render.prefer_gpu |= PREFER_GPU_RENDER;
-#endif
-#if !NO_COMPOSITE_SPANS
-        sna->render.check_composite_spans = gen4_check_composite_spans;
-        sna->render.composite_spans = gen4_render_composite_spans;
-        if (0)
-                sna->render.prefer_gpu |= PREFER_GPU_SPANS;
-#endif
-#if !NO_VIDEO
-        sna->render.video = gen4_render_video;
-#endif
+    sna->render.blit_tex = gen4_blit_tex;
-#if !NO_COPY_BOXES
-        sna->render.copy_boxes = gen4_render_copy_boxes;
-#endif
-#if !NO_COPY
-        sna->render.copy = gen4_render_copy;
-#endif
-#if !NO_FILL_BOXES
-        sna->render.fill_boxes = gen4_render_fill_boxes;
-#endif
-#if !NO_FILL
-        sna->render.fill = gen4_render_fill;
-#endif
-#if !NO_FILL_ONE
-        sna->render.fill_one = gen4_render_fill_one;
-#endif
-#endif
-    sna->render.blit_tex = gen4_blit_tex;
-    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
         sna->render.flush = gen4_render_flush;
         sna->render.reset = gen4_render_reset;
         sna->render.fini = gen4_render_fini;
 ,95 → 1441,8
         sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
         sna->render.max_3d_pitch = 1 << 18;
-        return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
-}
+    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
-static bool
-gen4_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp)
-{
-    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
-         width, height, sna->kgem.ring));
-    tmp->op = PictOpSrc;
-    tmp->dst.pixmap = dst;
-    tmp->dst.bo     = dst_bo;
-    tmp->dst.width  = dst->drawable.width;
-    tmp->dst.height = dst->drawable.height;
-    tmp->dst.format = PICT_x8r8g8b8;
-        tmp->src.repeat = RepeatNone;
-        tmp->src.filter = PictFilterNearest;
-    tmp->src.is_affine = true;
-    tmp->src.bo = src_bo;
-        tmp->src.pict_format = PICT_x8r8g8b8;
-    tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
-    tmp->src.width  = src->drawable.width;
-    tmp->src.height = src->drawable.height;
-        tmp->is_affine = tmp->src.is_affine;
-        tmp->has_component_alpha = false;
-        tmp->need_magic_ca_pass = false;
-        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
-        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
-    tmp->mask.is_affine = true;
-    tmp->mask.bo = mask_bo;
-    tmp->mask.pict_format = PIXMAN_a8;
-    tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
-    tmp->mask.width  = mask->drawable.width;
-    tmp->mask.height = mask->drawable.height;
-    if( scale )
-    {
-        tmp->src.scale[0] = 1.f/width;
-        tmp->src.scale[1] = 1.f/height;
-    }
-    else
-    {
-        tmp->src.scale[0] = 1.f/src->drawable.width;
-        tmp->src.scale[1] = 1.f/src->drawable.height;
-    }
-//    tmp->src.offset[0] = -dst_x;
-//    tmp->src.offset[1] = -dst_y;
-    tmp->mask.scale[0] = 1.f/mask->drawable.width;
-    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-//    tmp->mask.offset[0] = -dst_x;
-//    tmp->mask.offset[1] = -dst_y;
-        tmp->u.gen4.wm_kernel =
-                gen4_choose_composite_kernel(tmp->op,
-                                             tmp->mask.bo != NULL,
-                                             tmp->has_component_alpha,
-                                             tmp->is_affine);
-        tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
-        tmp->blt   = gen4_render_composite_blt;
-        tmp->done  = gen4_render_composite_done;
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                           NULL)) {
-                kgem_submit(&sna->kgem);
-        }
-        gen4_bind_surfaces(sna, tmp);
-        gen4_align_vertex(sna, tmp);
         return true;
 }

 /drivers/video/Intel-2D/gen5_render.c
 ,24 → 279,8
                 return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
         case PICT_x8r8g8b8:
                 return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
-        case PICT_a8b8g8r8:
-                return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_x8b8g8r8:
-                return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
-        case PICT_a2r10g10b10:
-                return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_x2r10g10b10:
-                return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
-        case PICT_r8g8b8:
-                return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
-        case PICT_r5g6b5:
-                return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_a1r5g5b5:
-                return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
         case PICT_a8:
                 return GEN5_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-                return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
         }
 }
 ,22 → 292,8
         case PICT_a8r8g8b8:
         case PICT_x8r8g8b8:
                 return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
-        case PICT_a8b8g8r8:
-        case PICT_x8b8g8r8:
-                return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_a2r10g10b10:
-        case PICT_x2r10g10b10:
-                return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_r5g6b5:
-                return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_x1r5g5b5:
-        case PICT_a1r5g5b5:
-                return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
         case PICT_a8:
                 return GEN5_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-        case PICT_x4r4g4b4:
-                return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
         }
 }
 typedef struct gen5_surface_state_padded {
 ,7 → 384,7
         /* After the first bind, we manage the cache domains within the batch */
         if (!DBG_NO_SURFACE_CACHE) {
-                offset = kgem_bo_get_binding(bo, format | is_dst << 31);
+                offset = kgem_bo_get_binding(bo, format);
                 if (offset) {
                         if (is_dst)
                                 kgem_bo_mark_dirty(bo);
 ,10 → 400,9
                  GEN5_SURFACE_BLEND_ENABLED |
                  format << GEN5_SURFACE_FORMAT_SHIFT);
-        if (is_dst) {
-                ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
+        if (is_dst)
                 domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
-        } else
+        else
                 domains = I915_GEM_DOMAIN_SAMPLER << 16;
         ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 ,7 → 413,7
         ss[4] = 0;
         ss[5] = 0;
-        kgem_bo_set_binding(bo, format | is_dst << 31, offset);
+        kgem_bo_set_binding(bo, format, offset);
         DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
              offset, bo->handle, ss[1],
 ,8 → 541,8
                         goto start;
         }
+        assert(op->floats_per_rect >= vertex_space(sna));
         assert(rem <= vertex_space(sna));
-        assert(op->floats_per_rect <= rem);
         if (want > 1 && want * op->floats_per_rect > rem)
                 want = rem / op->floats_per_rect;
 ,13 → 649,8
          *
          * However, the kernel flushes the pipeline between batches,
          * so we should be safe....
-         *
-         * On the other hand, after using BLT we must use a non-pipelined
-         * operation...
+         * OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
          */
-        if (sna->kgem.nreloc)
-                OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
         OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
         gen5_emit_state_base_address(sna);
 ,15 → 732,12
                             kernel);
         bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
+        DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
         key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
-        DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
-             __FUNCTION__, sp, bp, key,
-             sna->render_state.gen5.last_pipelined_pointers & 0xffff,
-             (sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
-             sna->render_state.gen5.last_pipelined_pointers));
         if (key == sna->render_state.gen5.last_pipelined_pointers)
                 return false;
         OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
         OUT_BATCH(sna->render_state.gen5.vs);
         OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
 ,15 → 746,11
         OUT_BATCH(sna->render_state.gen5.wm + sp);
         OUT_BATCH(sna->render_state.gen5.cc + bp);
-        bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
         sna->render_state.gen5.last_pipelined_pointers = key;
-        gen5_emit_urb(sna);
-        return bp;
+        return true;
 }
-static bool
+static void
 gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
 {
         uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
 ,7 → 762,7
         if (!DBG_NO_STATE_CACHE &&
             sna->render_state.gen5.drawrect_limit == limit &&
             sna->render_state.gen5.drawrect_offset == offset)
-                return false;
+                return;
         sna->render_state.gen5.drawrect_offset = offset;
         sna->render_state.gen5.drawrect_limit = limit;
 ,7 → 771,6
         OUT_BATCH(0x00000000);
         OUT_BATCH(limit);
         OUT_BATCH(offset);
-        return true;
 }
 static void
 ,35 → 892,11
         }
 }
-inline static void
-gen5_emit_pipe_flush(struct sna *sna)
-{
-        OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
-        OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
-        OUT_BATCH(0);
-        OUT_BATCH(0);
-}
 static void
 gen5_emit_state(struct sna *sna,
                 const struct sna_composite_op *op,
                 uint16_t offset)
 {
-        bool flush = false;
-        assert(op->dst.bo->exec);
-        /* drawrect must be first for Ironlake BLT workaround */
-        if (gen5_emit_drawing_rectangle(sna, op))
-                offset &= ~1;
-        gen5_emit_binding_table(sna, offset & ~1);
-        if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
-                DBG(("%s: changed blend state, flush required? %d\n",
-                     __FUNCTION__, (offset & 1) && op->op > PictOpSrc));
-                flush = (offset & 1) && op->op > PictOpSrc;
-        }
-        gen5_emit_vertex_elements(sna, op);
         if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
                 DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
                      kgem_bo_is_dirty(op->src.bo),
 ,18 → 904,19
                 OUT_BATCH(MI_FLUSH);
                 kgem_clear_dirty(&sna->kgem);
                 kgem_bo_mark_dirty(op->dst.bo);
-                flush = false;
         }
-        if (flush) {
-                DBG(("%s: forcing flush\n", __FUNCTION__));
-                gen5_emit_pipe_flush(sna);
+        /* drawrect must be first for Ironlake BLT workaround */
+        gen5_emit_drawing_rectangle(sna, op);
+        gen5_emit_binding_table(sna, offset);
+        if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
+                gen5_emit_urb(sna);
+        gen5_emit_vertex_elements(sna, op);
         }
-}
 static void gen5_bind_surfaces(struct sna *sna,
                                const struct sna_composite_op *op)
 {
-        bool dirty = kgem_bo_is_dirty(op->dst.bo);
         uint32_t *binding_table;
         uint16_t offset;
 ,7 → 953,7
                 offset = sna->render_state.gen5.surface_table;
         }
-        gen5_emit_state(sna, op, offset | dirty);
+        gen5_emit_state(sna, op, offset);
 }
 fastcall static void
 ,335 → 972,8
         op->prim_emit(sna, op, r);
 }
-#if 0
-fastcall static void
-gen5_render_composite_box(struct sna *sna,
-                          const struct sna_composite_op *op,
-                          const BoxRec *box)
-{
-        struct sna_composite_rectangles r;
-        DBG(("  %s: (%d, %d), (%d, %d)\n",
-             __FUNCTION__,
-             box->x1, box->y1, box->x2, box->y2));
-        gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
-        r.dst.x = box->x1;
-        r.dst.y = box->y1;
-        r.width  = box->x2 - box->x1;
-        r.height = box->y2 - box->y1;
-        r.mask = r.src = r.dst;
-        op->prim_emit(sna, op, &r);
-}
 static void
-gen5_render_composite_boxes__blt(struct sna *sna,
-                                 const struct sna_composite_op *op,
-                                 const BoxRec *box, int nbox)
-{
-        DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
-             __FUNCTION__, nbox, op->dst.x, op->dst.y,
-             op->src.offset[0], op->src.offset[1],
-             op->src.width, op->src.height,
-             op->mask.offset[0], op->mask.offset[1],
-             op->mask.width, op->mask.height));
-        do {
-                int nbox_this_time;
-                nbox_this_time = gen5_get_rectangles(sna, op, nbox,
-                                                     gen5_bind_surfaces);
-                nbox -= nbox_this_time;
-                do {
-                        struct sna_composite_rectangles r;
-                        DBG(("  %s: (%d, %d), (%d, %d)\n",
-                             __FUNCTION__,
-                             box->x1, box->y1, box->x2, box->y2));
-                        r.dst.x = box->x1;
-                        r.dst.y = box->y1;
-                        r.width  = box->x2 - box->x1;
-                        r.height = box->y2 - box->y1;
-                        r.mask = r.src = r.dst;
-                        op->prim_emit(sna, op, &r);
-                        box++;
-                } while (--nbox_this_time);
-        } while (nbox);
-}
-static void
-gen5_render_composite_boxes(struct sna *sna,
-                            const struct sna_composite_op *op,
-                            const BoxRec *box, int nbox)
-{
-        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
-        do {
-                int nbox_this_time;
-                float *v;
-                nbox_this_time = gen5_get_rectangles(sna, op, nbox,
-                                                     gen5_bind_surfaces);
-                assert(nbox_this_time);
-                nbox -= nbox_this_time;
-                v = sna->render.vertices + sna->render.vertex_used;
-                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
-                op->emit_boxes(op, box, nbox_this_time, v);
-                box += nbox_this_time;
-        } while (nbox);
-}
-static void
-gen5_render_composite_boxes__thread(struct sna *sna,
-                                    const struct sna_composite_op *op,
-                                    const BoxRec *box, int nbox)
-{
-        DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
-        sna_vertex_lock(&sna->render);
-        do {
-                int nbox_this_time;
-                float *v;
-                nbox_this_time = gen5_get_rectangles(sna, op, nbox,
-                                                     gen5_bind_surfaces);
-                assert(nbox_this_time);
-                nbox -= nbox_this_time;
-                v = sna->render.vertices + sna->render.vertex_used;
-                sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
-                sna_vertex_acquire__locked(&sna->render);
-                sna_vertex_unlock(&sna->render);
-                op->emit_boxes(op, box, nbox_this_time, v);
-                box += nbox_this_time;
-                sna_vertex_lock(&sna->render);
-                sna_vertex_release__locked(&sna->render);
-        } while (nbox);
-        sna_vertex_unlock(&sna->render);
-}
-#ifndef MAX
-#define MAX(a,b) ((a) > (b) ? (a) : (b))
-#endif
-static uint32_t gen5_bind_video_source(struct sna *sna,
-                                       struct kgem_bo *src_bo,
-                                       uint32_t src_offset,
-                                       int src_width,
-                                       int src_height,
-                                       int src_pitch,
-                                       uint32_t src_surf_format)
-{
-        struct gen5_surface_state *ss;
-        sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
-        ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
-        ss->ss0.surface_type = GEN5_SURFACE_2D;
-        ss->ss0.surface_format = src_surf_format;
-        ss->ss0.color_blend = 1;
-        ss->ss1.base_addr =
-                kgem_add_reloc(&sna->kgem,
-                               sna->kgem.surface + 1,
-                               src_bo,
-                               I915_GEM_DOMAIN_SAMPLER << 16,
-                               src_offset);
-        ss->ss2.width  = src_width - 1;
-        ss->ss2.height = src_height - 1;
-        ss->ss3.pitch  = src_pitch - 1;
-        return sna->kgem.surface * sizeof(uint32_t);
-}
-static void gen5_video_bind_surfaces(struct sna *sna,
-                                     const struct sna_composite_op *op)
-{
-        bool dirty = kgem_bo_is_dirty(op->dst.bo);
-        struct sna_video_frame *frame = op->priv;
-        uint32_t src_surf_format;
-        uint32_t src_surf_base[6];
-        int src_width[6];
-        int src_height[6];
-        int src_pitch[6];
-        uint32_t *binding_table;
-        uint16_t offset;
-        int n_src, n;
-        src_surf_base[0] = 0;
-        src_surf_base[1] = 0;
-        src_surf_base[2] = frame->VBufOffset;
-        src_surf_base[3] = frame->VBufOffset;
-        src_surf_base[4] = frame->UBufOffset;
-        src_surf_base[5] = frame->UBufOffset;
-        if (is_planar_fourcc(frame->id)) {
-                src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
-                src_width[1]  = src_width[0]  = frame->width;
-                src_height[1] = src_height[0] = frame->height;
-                src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
-                src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
-                        frame->width / 2;
-                src_height[4] = src_height[5] = src_height[2] = src_height[3] =
-                        frame->height / 2;
-                src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
-                        frame->pitch[0];
-                n_src = 6;
-        } else {
-                if (frame->id == FOURCC_UYVY)
-                        src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
-                else
-                        src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
-                src_width[0]  = frame->width;
-                src_height[0] = frame->height;
-                src_pitch[0]  = frame->pitch[0];
-                n_src = 1;
-        }
-        gen5_get_batch(sna, op);
-        binding_table = gen5_composite_get_binding_table(sna, &offset);
-        binding_table[0] =
-                gen5_bind_bo(sna,
-                             op->dst.bo, op->dst.width, op->dst.height,
-                             gen5_get_dest_format(op->dst.format),
-                             true);
-        for (n = 0; n < n_src; n++) {
-                binding_table[1+n] =
-                        gen5_bind_video_source(sna,
-                                               frame->bo,
-                                               src_surf_base[n],
-                                               src_width[n],
-                                               src_height[n],
-                                               src_pitch[n],
-                                               src_surf_format);
-        }
-        gen5_emit_state(sna, op, offset | dirty);
-}
-static bool
-gen5_render_video(struct sna *sna,
-                  struct sna_video *video,
-                  struct sna_video_frame *frame,
-                  RegionPtr dstRegion,
-                  PixmapPtr pixmap)
-{
-        struct sna_composite_op tmp;
-        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
-        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
-        int src_width = frame->src.x2 - frame->src.x1;
-        int src_height = frame->src.y2 - frame->src.y1;
-        float src_offset_x, src_offset_y;
-        float src_scale_x, src_scale_y;
-        int nbox, pix_xoff, pix_yoff;
-        struct sna_pixmap *priv;
-        BoxPtr box;
-        DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
-             src_width, src_height, dst_width, dst_height));
-        priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
-        if (priv == NULL)
-                return false;
-        memset(&tmp, 0, sizeof(tmp));
-        tmp.op = PictOpSrc;
-        tmp.dst.pixmap = pixmap;
-        tmp.dst.width  = pixmap->drawable.width;
-        tmp.dst.height = pixmap->drawable.height;
-        tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
-        tmp.dst.bo = priv->gpu_bo;
-        if (src_width == dst_width && src_height == dst_height)
-                tmp.src.filter = SAMPLER_FILTER_NEAREST;
-        else
-                tmp.src.filter = SAMPLER_FILTER_BILINEAR;
-        tmp.src.repeat = SAMPLER_EXTEND_PAD;
-        tmp.src.bo = frame->bo;
-        tmp.mask.bo = NULL;
-        tmp.u.gen5.wm_kernel =
-                is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
-        tmp.u.gen5.ve_id = 2;
-        tmp.is_affine = true;
-        tmp.floats_per_vertex = 3;
-        tmp.floats_per_rect = 9;
-        tmp.priv = frame;
-        if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
-                kgem_submit(&sna->kgem);
-                assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
-        }
-        gen5_video_bind_surfaces(sna, &tmp);
-        gen5_align_vertex(sna, &tmp);
-        /* Set up the offset for translating from the given region (in screen
-         * coordinates) to the backing pixmap.
-         */
-#ifdef COMPOSITE
-        pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
-        pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
-#else
-        pix_xoff = 0;
-        pix_yoff = 0;
-#endif
-        src_scale_x = (float)src_width / dst_width / frame->width;
-        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
-        src_scale_y = (float)src_height / dst_height / frame->height;
-        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
-        box = REGION_RECTS(dstRegion);
-        nbox = REGION_NUM_RECTS(dstRegion);
-        while (nbox--) {
-                BoxRec r;
-                r.x1 = box->x1 + pix_xoff;
-                r.x2 = box->x2 + pix_xoff;
-                r.y1 = box->y1 + pix_yoff;
-                r.y2 = box->y2 + pix_yoff;
-                gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
-                OUT_VERTEX(r.x2, r.y2);
-                OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                OUT_VERTEX(r.x1, r.y2);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                OUT_VERTEX(r.x1, r.y1);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
-                if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
-                        sna_damage_add_box(&priv->gpu_damage, &r);
-                        sna_damage_subtract_box(&priv->cpu_damage, &r);
-                }
-                box++;
-        }
-        gen4_vertex_flush(sna);
-        return true;
-}
-#endif
-static void
 gen5_render_composite_done(struct sna *sna,
                            const struct sna_composite_op *op)
 {
 ,175 → 986,69
 }
-#if 0
-static bool
-gen5_composite_set_target(struct sna *sna,
-                          struct sna_composite_op *op,
-                          PicturePtr dst,
-                          int x, int y, int w, int h,
-                          bool partial)
-{
-        BoxRec box;
-        op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
-        op->dst.width  = op->dst.pixmap->drawable.width;
-        op->dst.height = op->dst.pixmap->drawable.height;
-        op->dst.format = dst->format;
-        if (w && h) {
-                box.x1 = x;
-                box.y1 = y;
-                box.x2 = x + w;
-                box.y2 = y + h;
-        } else
-                sna_render_picture_extents(dst, &box);
-        op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
-                                          PREFER_GPU | FORCE_GPU | RENDER_GPU,
-                                          &box, &op->damage);
-        if (op->dst.bo == NULL)
-                return false;
-        get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
-                            &op->dst.x, &op->dst.y);
-        DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
-             __FUNCTION__,
-             op->dst.pixmap, (int)op->dst.format,
-             op->dst.width, op->dst.height,
-             op->dst.bo->pitch,
-             op->dst.x, op->dst.y,
-             op->damage ? *op->damage : (void *)-1));
-        assert(op->dst.bo->proxy == NULL);
-        if (too_large(op->dst.width, op->dst.height) &&
-            !sna_render_composite_redirect(sna, op, x, y, w, h, partial))
-                return false;
-        return true;
-}
 static bool
-gen5_render_composite(struct sna *sna,
-                      uint8_t op,
-                      PicturePtr src,
-                      PicturePtr mask,
-                      PicturePtr dst,
-                      int16_t src_x, int16_t src_y,
-                      int16_t msk_x, int16_t msk_y,
-                      int16_t dst_x, int16_t dst_y,
-                      int16_t width, int16_t height,
+gen5_blit_tex(struct sna *sna,
+              uint8_t op, bool scale,
+                      PixmapPtr src, struct kgem_bo *src_bo,
+                      PixmapPtr mask,struct kgem_bo *mask_bo,
+                      PixmapPtr dst, struct kgem_bo *dst_bo,
+              int32_t src_x, int32_t src_y,
+              int32_t msk_x, int32_t msk_y,
+              int32_t dst_x, int32_t dst_y,
+              int32_t width, int32_t height,
                       struct sna_composite_op *tmp)
 {
         DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
              width, height, sna->kgem.mode));
-        if (op >= ARRAY_SIZE(gen5_blend_op)) {
-                DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
-                return false;
-        }
+    tmp->op = PictOpSrc;
-        if (mask == NULL &&
-            try_blt(sna, dst, src, width, height) &&
-            sna_blt_composite(sna, op,
-                              src, dst,
-                              src_x, src_y,
-                              dst_x, dst_y,
-                              width, height,
-                              tmp, false))
-                return true;
+    tmp->dst.pixmap = dst;
+    tmp->dst.bo     = dst_bo;
+    tmp->dst.width  = dst->drawable.width;
+    tmp->dst.height = dst->drawable.height;
+    tmp->dst.format = PICT_x8r8g8b8;
-        if (gen5_composite_fallback(sna, src, mask, dst))
-                return false;
-        if (need_tiling(sna, width, height))
-                return sna_tiling_composite(op, src, mask, dst,
-                                            src_x, src_y,
-                                            msk_x, msk_y,
-                                            dst_x, dst_y,
-                                            width, height,
-                                            tmp);
+        tmp->src.repeat = RepeatNone;
+        tmp->src.filter = PictFilterNearest;
+    tmp->src.is_affine = true;
-        if (!gen5_composite_set_target(sna, tmp, dst,
-                                       dst_x, dst_y, width, height,
-                                       op > PictOpSrc || dst->pCompositeClip->data)) {
-                DBG(("%s: failed to set composite target\n", __FUNCTION__));
-                return false;
-        }
+    tmp->src.bo = src_bo;
+        tmp->src.pict_format = PICT_x8r8g8b8;
+    tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
+    tmp->src.width  = src->drawable.width;
+    tmp->src.height = src->drawable.height;
-        DBG(("%s: preparing source\n", __FUNCTION__));
-        tmp->op = op;
-        switch (gen5_composite_picture(sna, src, &tmp->src,
-                                       src_x, src_y,
-                                       width, height,
-                                       dst_x, dst_y,
-                                       dst->polyMode == PolyModePrecise)) {
-        case -1:
-                DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
-                goto cleanup_dst;
-        case 0:
-                if (!gen4_channel_init_solid(sna, &tmp->src, 0))
-                        goto cleanup_dst;
-                /* fall through to fixup */
-        case 1:
-                if (mask == NULL &&
-                    sna_blt_composite__convert(sna,
-                                               dst_x, dst_y, width, height,
-                                               tmp))
-                        return true;
-                gen5_composite_channel_convert(&tmp->src);
-                break;
-        }
         tmp->is_affine = tmp->src.is_affine;
         tmp->has_component_alpha = false;
         tmp->need_magic_ca_pass = false;
-        if (mask) {
-                if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
-                        tmp->has_component_alpha = true;
+    tmp->mask.is_affine = true;
+        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+    tmp->mask.bo = mask_bo;
+    tmp->mask.pict_format = PIXMAN_a8;
+    tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
+    tmp->mask.width  = mask->drawable.width;
+    tmp->mask.height = mask->drawable.height;
-                        /* Check if it's component alpha that relies on a source alpha and on
-                         * the source value.  We can only get one of those into the single
-                         * source value that we get to blend with.
-                         */
-                        if (gen5_blend_op[op].src_alpha &&
-                            (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
-                                if (op != PictOpOver) {
-                                        DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
-                                        goto cleanup_src;
+    if( scale )
+    {
+        tmp->src.scale[0] = 1.f/width;
+        tmp->src.scale[1] = 1.f/height;
                                 }
-                                tmp->need_magic_ca_pass = true;
-                                tmp->op = PictOpOutReverse;
+    else
+    {
+        tmp->src.scale[0] = 1.f/src->drawable.width;
+        tmp->src.scale[1] = 1.f/src->drawable.height;
                         }
-                }
-                if (!reuse_source(sna,
-                                  src, &tmp->src, src_x, src_y,
-                                  mask, &tmp->mask, msk_x, msk_y)) {
-                        DBG(("%s: preparing mask\n", __FUNCTION__));
-                        switch (gen5_composite_picture(sna, mask, &tmp->mask,
-                                                       msk_x, msk_y,
-                                                       width, height,
-                                                       dst_x, dst_y,
-                                                       dst->polyMode == PolyModePrecise)) {
-                        case -1:
-                                DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
-                                goto cleanup_src;
-                        case 0:
-                                if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
-                                        goto cleanup_src;
-                                /* fall through to fixup */
-                        case 1:
-                                gen5_composite_channel_convert(&tmp->mask);
-                                break;
-                        }
-                }
+    tmp->mask.scale[0] = 1.f/mask->drawable.width;
+    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-                tmp->is_affine &= tmp->mask.is_affine;
-        }
         tmp->u.gen5.wm_kernel =
                 gen5_choose_composite_kernel(tmp->op,
 ,23 → 1055,15
                                              tmp->mask.bo != NULL,
                                              tmp->has_component_alpha,
                                              tmp->is_affine);
-        tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
+        tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
         tmp->blt   = gen5_render_composite_blt;
-        tmp->box   = gen5_render_composite_box;
-        tmp->boxes = gen5_render_composite_boxes__blt;
-        if (tmp->emit_boxes) {
-                tmp->boxes = gen5_render_composite_boxes;
-                tmp->thread_boxes = gen5_render_composite_boxes__thread;
-        }
+//      tmp->box   = gen5_render_composite_box;
         tmp->done  = gen5_render_composite_done;
         if (!kgem_check_bo(&sna->kgem,
                            tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
                 kgem_submit(&sna->kgem);
-                if (!kgem_check_bo(&sna->kgem,
-                                   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
-                        goto cleanup_mask;
         }
         gen5_bind_surfaces(sna, tmp);
 ,448 → 1070,11
         gen5_align_vertex(sna, tmp);
         return true;
-cleanup_mask:
-        if (tmp->mask.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
-cleanup_src:
-        if (tmp->src.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->src.bo);
-cleanup_dst:
-        if (tmp->redirect.real_bo)
-                kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
-        return false;
 }
-#if !NO_COMPOSITE_SPANS
-fastcall static void
-gen5_render_composite_spans_box(struct sna *sna,
-                                const struct sna_composite_spans_op *op,
-                                const BoxRec *box, float opacity)
-{
-        DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
-             __FUNCTION__,
-             op->base.src.offset[0], op->base.src.offset[1],
-             opacity,
-             op->base.dst.x, op->base.dst.y,
-             box->x1, box->y1,
-             box->x2 - box->x1,
-             box->y2 - box->y1));
-        gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
-        op->prim_emit(sna, op, box, opacity);
-}
 static void
-gen5_render_composite_spans_boxes(struct sna *sna,
-                                  const struct sna_composite_spans_op *op,
-                                  const BoxRec *box, int nbox,
-                                  float opacity)
-{
-        DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
-             __FUNCTION__, nbox,
-             op->base.src.offset[0], op->base.src.offset[1],
-             opacity,
-             op->base.dst.x, op->base.dst.y));
-        do {
-                int nbox_this_time;
-                nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
-                                                     gen5_bind_surfaces);
-                nbox -= nbox_this_time;
-                do {
-                        DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
-                             box->x1, box->y1,
-                             box->x2 - box->x1,
-                             box->y2 - box->y1));
-                        op->prim_emit(sna, op, box++, opacity);
-                } while (--nbox_this_time);
-        } while (nbox);
-}
-fastcall static void
-gen5_render_composite_spans_boxes__thread(struct sna *sna,
-                                          const struct sna_composite_spans_op *op,
-                                          const struct sna_opacity_box *box,
-                                          int nbox)
-{
-        DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
-             __FUNCTION__, nbox,
-             op->base.src.offset[0], op->base.src.offset[1],
-             op->base.dst.x, op->base.dst.y));
-        sna_vertex_lock(&sna->render);
-        do {
-                int nbox_this_time;
-                float *v;
-                nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
-                                                     gen5_bind_surfaces);
-                assert(nbox_this_time);
-                nbox -= nbox_this_time;
-                v = sna->render.vertices + sna->render.vertex_used;
-                sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
-                sna_vertex_acquire__locked(&sna->render);
-                sna_vertex_unlock(&sna->render);
-                op->emit_boxes(op, box, nbox_this_time, v);
-                box += nbox_this_time;
-                sna_vertex_lock(&sna->render);
-                sna_vertex_release__locked(&sna->render);
-        } while (nbox);
-        sna_vertex_unlock(&sna->render);
-}
-fastcall static void
-gen5_render_composite_spans_done(struct sna *sna,
-                                 const struct sna_composite_spans_op *op)
-{
-        if (sna->render.vertex_offset)
-                gen4_vertex_flush(sna);
-        DBG(("%s()\n", __FUNCTION__));
-        kgem_bo_destroy(&sna->kgem, op->base.src.bo);
-        sna_render_composite_redirect_done(sna, &op->base);
-}
-static bool
-gen5_check_composite_spans(struct sna *sna,
-                           uint8_t op, PicturePtr src, PicturePtr dst,
-                           int16_t width, int16_t height,
-                           unsigned flags)
-{
-        DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
-             __FUNCTION__, op, width, height, flags));
-        if (op >= ARRAY_SIZE(gen5_blend_op))
-                return false;
-        if (gen5_composite_fallback(sna, src, NULL, dst)) {
-                DBG(("%s: operation would fallback\n", __FUNCTION__));
-                return false;
-        }
-        if (need_tiling(sna, width, height) &&
-            !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
-                DBG(("%s: fallback, tiled operation not on GPU\n",
-                     __FUNCTION__));
-                return false;
-        }
-        if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
-                struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
-                assert(priv);
-                if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
-                        return true;
-                if (flags & COMPOSITE_SPANS_INPLACE_HINT)
-                        return false;
-                if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
-                    dst->format == PICT_a8)
-                        return false;
-                return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
-        }
-        return true;
-}
-static bool
-gen5_render_composite_spans(struct sna *sna,
-                            uint8_t op,
-                            PicturePtr src,
-                            PicturePtr dst,
-                            int16_t src_x,  int16_t src_y,
-                            int16_t dst_x,  int16_t dst_y,
-                            int16_t width,  int16_t height,
-                            unsigned flags,
-                            struct sna_composite_spans_op *tmp)
-{
-        DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
-             width, height, flags, sna->kgem.ring));
-        assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
-        if (need_tiling(sna, width, height)) {
-                DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
-                     __FUNCTION__, width, height));
-                return sna_tiling_composite_spans(op, src, dst,
-                                                  src_x, src_y, dst_x, dst_y,
-                                                  width, height, flags, tmp);
-        }
-        tmp->base.op = op;
-        if (!gen5_composite_set_target(sna, &tmp->base, dst,
-                                       dst_x, dst_y, width, height,
-                                       true))
-                return false;
-        switch (gen5_composite_picture(sna, src, &tmp->base.src,
-                                       src_x, src_y,
-                                       width, height,
-                                       dst_x, dst_y,
-                                       dst->polyMode == PolyModePrecise)) {
-        case -1:
-                goto cleanup_dst;
-        case 0:
-                if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
-                        goto cleanup_dst;
-                /* fall through to fixup */
-        case 1:
-                gen5_composite_channel_convert(&tmp->base.src);
-                break;
-        }
-        tmp->base.mask.bo = NULL;
-        tmp->base.is_affine = tmp->base.src.is_affine;
-        tmp->base.has_component_alpha = false;
-        tmp->base.need_magic_ca_pass = false;
-        tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
-        tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
-        tmp->box   = gen5_render_composite_spans_box;
-        tmp->boxes = gen5_render_composite_spans_boxes;
-        if (tmp->emit_boxes)
-                tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
-        tmp->done  = gen5_render_composite_spans_done;
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->base.dst.bo, tmp->base.src.bo,
-                           NULL))  {
-                kgem_submit(&sna->kgem);
-                if (!kgem_check_bo(&sna->kgem,
-                                   tmp->base.dst.bo, tmp->base.src.bo,
-                                   NULL))
-                        goto cleanup_src;
-        }
-        gen5_bind_surfaces(sna, &tmp->base);
-        gen5_align_vertex(sna, &tmp->base);
-        return true;
-cleanup_src:
-        if (tmp->base.src.bo)
-                kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
-cleanup_dst:
-        if (tmp->base.redirect.real_bo)
-                kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
-        return false;
-}
-#endif
-static bool
-gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
-                       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
-                       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
-                       const BoxRec *box, int n, unsigned flags)
-{
-        struct sna_composite_op tmp;
-        DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
-             __FUNCTION__, alu,
-             src->drawable.serialNumber, src_bo->handle,
-             dst->drawable.serialNumber, dst_bo->handle,
-             n, box->x1, box->y1, box->x2, box->y2,
-             flags));
-        if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
-            sna_blt_copy_boxes(sna, alu,
-                               src_bo, src_dx, src_dy,
-                               dst_bo, dst_dx, dst_dy,
-                               dst->drawable.bitsPerPixel,
-                               box, n))
-                return true;
-        if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
-fallback_blt:
-                if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
-                        return false;
-                return sna_blt_copy_boxes_fallback(sna, alu,
-                                                   src, src_bo, src_dx, src_dy,
-                                                   dst, dst_bo, dst_dx, dst_dy,
-                                                   box, n);
-        }
-        memset(&tmp, 0, sizeof(tmp));
-        if (dst->drawable.depth == src->drawable.depth) {
-                tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
-                tmp.src.pict_format = tmp.dst.format;
-        } else {
-                tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
-                tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
-        }
-        if (!gen5_check_format(tmp.src.pict_format)) {
-                DBG(("%s: unsupported source format, %x, use BLT\n",
-                     __FUNCTION__, tmp.src.pict_format));
-                goto fallback_blt;
-        }
-        DBG(("%s (%d, %d)->(%d, %d) x %d\n",
-             __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
-        tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
-        tmp.dst.pixmap = dst;
-        tmp.dst.width  = dst->drawable.width;
-        tmp.dst.height = dst->drawable.height;
-        tmp.dst.x = tmp.dst.y = 0;
-        tmp.dst.bo = dst_bo;
-        tmp.damage = NULL;
-        sna_render_composite_redirect_init(&tmp);
-        if (too_large(tmp.dst.width, tmp.dst.height)) {
-                BoxRec extents = box[0];
-                int i;
-                for (i = 1; i < n; i++) {
-                        if (box[i].x1 < extents.x1)
-                                extents.x1 = box[i].x1;
-                        if (box[i].y1 < extents.y1)
-                                extents.y1 = box[i].y1;
-                        if (box[i].x2 > extents.x2)
-                                extents.x2 = box[i].x2;
-                        if (box[i].y2 > extents.y2)
-                                extents.y2 = box[i].y2;
-                }
-                if (!sna_render_composite_redirect(sna, &tmp,
-                                                   extents.x1 + dst_dx,
-                                                   extents.y1 + dst_dy,
-                                                   extents.x2 - extents.x1,
-                                                   extents.y2 - extents.y1,
-                                                   n > 1))
-                        goto fallback_tiled;
-        }
-        tmp.src.filter = SAMPLER_FILTER_NEAREST;
-        tmp.src.repeat = SAMPLER_EXTEND_NONE;
-        tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
-        if (too_large(src->drawable.width, src->drawable.height)) {
-                BoxRec extents = box[0];
-                int i;
-                for (i = 1; i < n; i++) {
-                        if (box[i].x1 < extents.x1)
-                                extents.x1 = box[i].x1;
-                        if (box[i].y1 < extents.y1)
-                                extents.y1 = box[i].y1;
-                        if (box[i].x2 > extents.x2)
-                                extents.x2 = box[i].x2;
-                        if (box[i].y2 > extents.y2)
-                                extents.y2 = box[i].y2;
-                }
-                if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
-                                               extents.x1 + src_dx,
-                                               extents.y1 + src_dy,
-                                               extents.x2 - extents.x1,
-                                               extents.y2 - extents.y1))
-                        goto fallback_tiled_dst;
-        } else {
-                tmp.src.bo = kgem_bo_reference(src_bo);
-                tmp.src.width  = src->drawable.width;
-                tmp.src.height = src->drawable.height;
-                tmp.src.offset[0] = tmp.src.offset[1] = 0;
-                tmp.src.scale[0] = 1.f/src->drawable.width;
-                tmp.src.scale[1] = 1.f/src->drawable.height;
-        }
-        tmp.is_affine = true;
-        tmp.floats_per_vertex = 3;
-        tmp.floats_per_rect = 9;
-        tmp.u.gen5.wm_kernel = WM_KERNEL;
-        tmp.u.gen5.ve_id = 2;
-        if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
-                kgem_submit(&sna->kgem);
-                if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
-                        DBG(("%s: aperture check failed\n", __FUNCTION__));
-                        goto fallback_tiled_src;
-                }
-        }
-        dst_dx += tmp.dst.x;
-        dst_dy += tmp.dst.y;
-        tmp.dst.x = tmp.dst.y = 0;
-        src_dx += tmp.src.offset[0];
-        src_dy += tmp.src.offset[1];
-        gen5_copy_bind_surfaces(sna, &tmp);
-        gen5_align_vertex(sna, &tmp);
-        do {
-                int n_this_time;
-                n_this_time = gen5_get_rectangles(sna, &tmp, n,
-                                                  gen5_copy_bind_surfaces);
-                n -= n_this_time;
-                do {
-                        DBG(("  (%d, %d) -> (%d, %d) + (%d, %d)\n",
-                             box->x1 + src_dx, box->y1 + src_dy,
-                             box->x1 + dst_dx, box->y1 + dst_dy,
-                             box->x2 - box->x1, box->y2 - box->y1));
-                        OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
-                        OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
-                        OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
-                        OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
-                        OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
-                        OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
-                        OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
-                        OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
-                        OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
-                        box++;
-                } while (--n_this_time);
-        } while (n);
-        gen4_vertex_flush(sna);
-        sna_render_composite_redirect_done(sna, &tmp);
-        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
-        return true;
-fallback_tiled_src:
-        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
-fallback_tiled_dst:
-        if (tmp.redirect.real_bo)
-                kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
-fallback_tiled:
-        if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
-            sna_blt_copy_boxes(sna, alu,
-                               src_bo, src_dx, src_dy,
-                               dst_bo, dst_dx, dst_dy,
-                               dst->drawable.bitsPerPixel,
-                               box, n))
-                return true;
-        DBG(("%s: tiled fallback\n", __FUNCTION__));
-        return sna_tiling_copy_boxes(sna, alu,
-                                     src, src_bo, src_dx, src_dy,
-                                     dst, dst_bo, dst_dx, dst_dy,
-                                     box, n);
-}
-#endif
-static void
 gen5_render_flush(struct sna *sna)
 {
         gen4_vertex_close(sna);
 ,38 → 1397,16
     return state->general_bo != NULL;
 }
-const char *gen5_render_init(struct sna *sna, const char *backend)
+bool gen5_render_init(struct sna *sna)
 {
         if (!gen5_render_setup(sna))
-                return backend;
+                return false;
         sna->kgem.context_switch = gen5_render_context_switch;
         sna->kgem.retire = gen5_render_retire;
         sna->kgem.expire = gen5_render_expire;
-#if 0
-#if !NO_COMPOSITE
-        sna->render.composite = gen5_render_composite;
-        sna->render.prefer_gpu |= PREFER_GPU_RENDER;
-#endif
-#if !NO_COMPOSITE_SPANS
-        sna->render.check_composite_spans = gen5_check_composite_spans;
-        sna->render.composite_spans = gen5_render_composite_spans;
-        if (sna->PciInfo->device_id == 0x0044)
-                sna->render.prefer_gpu |= PREFER_GPU_SPANS;
-#endif
-        sna->render.video = gen5_render_video;
-        sna->render.copy_boxes = gen5_render_copy_boxes;
-        sna->render.copy = gen5_render_copy;
-        sna->render.fill_boxes = gen5_render_fill_boxes;
-        sna->render.fill = gen5_render_fill;
-        sna->render.fill_one = gen5_render_fill_one;
-#endif
     sna->render.blit_tex = gen5_blit_tex;
-    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
         sna->render.flush = gen5_render_flush;
         sna->render.reset = gen5_render_reset;
 ,89 → 1414,7
         sna->render.max_3d_size = MAX_3D_SIZE;
         sna->render.max_3d_pitch = 1 << 18;
-        return "Ironlake (gen5)";
-};
+    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
-static bool
-gen5_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp)
-{
-        DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
-             width, height, sna->kgem.mode));
-    tmp->op = PictOpSrc;
-    tmp->dst.pixmap = dst;
-    tmp->dst.bo     = dst_bo;
-    tmp->dst.width  = dst->drawable.width;
-    tmp->dst.height = dst->drawable.height;
-    tmp->dst.format = PICT_x8r8g8b8;
-        tmp->src.repeat = RepeatNone;
-        tmp->src.filter = PictFilterNearest;
-    tmp->src.is_affine = true;
-    tmp->src.bo = src_bo;
-        tmp->src.pict_format = PICT_x8r8g8b8;
-    tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
-    tmp->src.width  = src->drawable.width;
-    tmp->src.height = src->drawable.height;
-    tmp->is_affine = tmp->src.is_affine;
-        tmp->has_component_alpha = false;
-        tmp->need_magic_ca_pass = false;
-    tmp->mask.is_affine = true;
-        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
-        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
-    tmp->mask.bo = mask_bo;
-    tmp->mask.pict_format = PIXMAN_a8;
-    tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
-    tmp->mask.width  = mask->drawable.width;
-    tmp->mask.height = mask->drawable.height;
-    if( scale )
-    {
-        tmp->src.scale[0] = 1.f/width;
-        tmp->src.scale[1] = 1.f/height;
-    }
-    else
-    {
-        tmp->src.scale[0] = 1.f/src->drawable.width;
-        tmp->src.scale[1] = 1.f/src->drawable.height;
-    }
-    tmp->mask.scale[0] = 1.f/mask->drawable.width;
-    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-        tmp->u.gen5.wm_kernel =
-                gen5_choose_composite_kernel(tmp->op,
-                                             tmp->mask.bo != NULL,
-                                             tmp->has_component_alpha,
-                                             tmp->is_affine);
-        tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
-        tmp->blt   = gen5_render_composite_blt;
-        tmp->done  = gen5_render_composite_done;
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
-                kgem_submit(&sna->kgem);
-        }
-        gen5_bind_surfaces(sna, tmp);
-        gen5_align_vertex(sna, tmp);
         return true;
 }

 /drivers/video/Intel-2D/gen6_render.c
 ,6 → 30,9
  *
  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
 #include "sna.h"
 #include "sna_reg.h"
 ,7 → 54,7
 #define NO_FILL_ONE 0
 #define NO_FILL_CLEAR 0
-#define NO_RING_SWITCH 0
+#define NO_RING_SWITCH 1
 #define PREFER_RENDER 0
 #define USE_8_PIXEL_DISPATCH 1
 ,7 → 68,6
 #define GEN6_MAX_SIZE 8192
 struct gt_info {
-        const char *name;
         int max_vs_threads;
         int max_gs_threads;
         int max_wm_threads;
 ,7 → 79,6
 };
 static const struct gt_info gt1_info = {
-        .name = "Sandybridge (gen6, gt1)",
         .max_vs_threads = 24,
         .max_gs_threads = 21,
         .max_wm_threads = 40,
 ,7 → 86,6
 };
 static const struct gt_info gt2_info = {
-        .name = "Sandybridge (gen6, gt2)",
         .max_vs_threads = 60,
         .max_gs_threads = 60,
         .max_wm_threads = 80,
 ,6 → 254,18
     return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
         case PICT_x8r8g8b8:
                 return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
+        case PICT_a8:
+                return GEN6_SURFACEFORMAT_A8_UNORM;
+        };
+/*
+        switch (format) {
+        default:
+                return -1;
+        case PICT_a8r8g8b8:
+                return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
+        case PICT_x8r8g8b8:
+                return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
         case PICT_a8b8g8r8:
                 return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
         case PICT_x8b8g8r8:
 ,6 → 285,7
         case PICT_a4r4g4b4:
                 return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
         }
+ */
 }
 static uint32_t gen6_get_dest_format(PictFormat format)
 ,6 → 403,7
         int base;
         if (has_mask) {
                 if (is_ca) {
                         if (gen6_blend_op[op].src_alpha)
                                 base = GEN6_WM_KERNEL_MASKSA;
 ,6 → 411,7
                                 base = GEN6_WM_KERNEL_MASKCA;
                 } else
                         base = GEN6_WM_KERNEL_MASK;
         } else
                 base = GEN6_WM_KERNEL_NOMASK;
 ,8 → 889,6
 {
         bool need_stall = wm_binding_table & 1;
-        assert(op->dst.bo->exec);
         if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
                 need_stall = false;
         gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
 ,7 → 902,7
         if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
         gen6_emit_flush(sna);
         kgem_clear_dirty(&sna->kgem);
-                assert(op->dst.bo->exec);
+                if (op->dst.bo->exec)
                 kgem_bo_mark_dirty(op->dst.bo);
                 need_stall = false;
         }
 ,6 → 1039,8
 static uint32_t
 gen6_tiling_bits(uint32_t tiling)
 {
+    return 0;
+/*
         switch (tiling) {
         default: assert(0);
         case I915_TILING_NONE: return 0;
 ,6 → 1047,7
         case I915_TILING_X: return GEN6_SURFACE_TILED;
         case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
         }
+*/
 }
 /**
 ,7 → 1068,7
         uint32_t is_scanout = is_dst && bo->scanout;
         /* After the first bind, we manage the cache domains within the batch */
-        offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
+        offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
         if (offset) {
                 DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
                      offset, bo->handle, format,
 ,10 → 1084,9
         ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
                  GEN6_SURFACE_BLEND_ENABLED |
                  format << GEN6_SURFACE_FORMAT_SHIFT);
-        if (is_dst) {
-                ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
+        if (is_dst)
                 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
-        } else
+        else
                 domains = I915_GEM_DOMAIN_SAMPLER << 16;
         ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
         ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
 ,9 → 1095,9
         ss[3] = (gen6_tiling_bits(bo->tiling) |
                  (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
         ss[4] = 0;
-        ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
+        ss[5] = is_scanout ? 0 : 3 << 16;
-        kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
+        kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
         DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
              offset, bo->handle, ss[1],
 ,8 → 1210,6
 {
         int rem;
-        assert(want);
 start:
         rem = vertex_space(sna);
         if (unlikely(rem < op->floats_per_rect)) {
 ,15 → 1220,10
                         goto flush;
         }
-        if (unlikely(sna->render.vertex_offset == 0)) {
-                if (!gen6_rectangle_begin(sna, op))
+        if (unlikely(sna->render.vertex_offset == 0 &&
+                     !gen6_rectangle_begin(sna, op)))
                 goto flush;
-                else
-                        goto start;
-        }
-        assert(rem <= vertex_space(sna));
-        assert(op->floats_per_rect <= rem);
         if (want > 1 && want * op->floats_per_rect > rem)
                 want = rem / op->floats_per_rect;
 ,6 → 1340,7
         assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
 }
 fastcall static void
 gen6_render_composite_blt(struct sna *sna,
                           const struct sna_composite_op *op,
 ,6 → 1351,7
 }
 #if 0
 fastcall static void
 gen6_render_composite_box(struct sna *sna,
                           const struct sna_composite_op *op,
 ,6 → 1462,7
         } while (nbox);
         sna_vertex_unlock(&sna->render);
 }
 #endif
 #ifndef MAX
 ,6 → 1502,7
 }
 #if 0
 static uint32_t gen6_bind_video_source(struct sna *sna,
                                        struct kgem_bo *src_bo,
                                        uint32_t src_offset,
 ,24 → 1606,21
                   struct sna_video *video,
                   struct sna_video_frame *frame,
                   RegionPtr dstRegion,
+                  short src_w, short src_h,
+                  short drw_w, short drw_h,
+                  short dx, short dy,
                   PixmapPtr pixmap)
 {
         struct sna_composite_op tmp;
-        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
-        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
-        int src_width = frame->src.x2 - frame->src.x1;
-        int src_height = frame->src.y2 - frame->src.y1;
-        float src_offset_x, src_offset_y;
+        int nbox, pix_xoff, pix_yoff;
         float src_scale_x, src_scale_y;
-        int nbox, pix_xoff, pix_yoff;
         struct sna_pixmap *priv;
         unsigned filter;
         BoxPtr box;
-        DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
-             __FUNCTION__,
-             src_width, src_height, dst_width, dst_height,
-             (long)REGION_NUM_RECTS(dstRegion),
+        DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
+             __FUNCTION__, src_w, src_h, drw_w, drw_h,
+             REGION_NUM_RECTS(dstRegion),
              REGION_EXTENTS(NULL, dstRegion)->x1,
              REGION_EXTENTS(NULL, dstRegion)->y1,
              REGION_EXTENTS(NULL, dstRegion)->x2,
 ,7 → 1644,7
         tmp.floats_per_vertex = 3;
         tmp.floats_per_rect = 9;
-        if (src_width == dst_width && src_height == dst_height)
+        if (src_w == drw_w && src_h == drw_h)
                 filter = SAMPLER_FILTER_NEAREST;
         else
                 filter = SAMPLER_FILTER_BILINEAR;
 ,12 → 1680,10
         pix_yoff = 0;
 #endif
-        src_scale_x = (float)src_width / dst_width / frame->width;
-        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
+        /* Use normalized texture coordinates */
+        src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
+        src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
-        src_scale_y = (float)src_height / dst_height / frame->height;
-        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
         box = REGION_RECTS(dstRegion);
         nbox = REGION_NUM_RECTS(dstRegion);
         while (nbox--) {
 ,16 → 1697,16
                 gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
                 OUT_VERTEX(r.x2, r.y2);
-                OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
+                OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
+                OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
                 OUT_VERTEX(r.x1, r.y2);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
+                OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+                OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
                 OUT_VERTEX(r.x1, r.y1);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
+                OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
+                OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
                 if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
                         sna_damage_add_box(&priv->gpu_damage, &r);
 ,6 → 1714,7
                 }
                 box++;
         }
+        priv->clear = false;
         gen4_vertex_flush(sna);
         return true;
 ,6 → 1822,7
                 channel->card_format = gen6_get_card_format(channel->pict_format);
         assert(channel->card_format != (unsigned)-1);
 }
 #endif
 static void gen6_render_composite_done(struct sna *sna,
 ,15 → 1837,16
     }
+//   sna_render_composite_redirect_done(sna, op);
 }
 #if 0
 static bool
 gen6_composite_set_target(struct sna *sna,
                           struct sna_composite_op *op,
                           PicturePtr dst,
-                          int x, int y, int w, int h,
-                          bool partial)
+                          int x, int y, int w, int h)
 {
         BoxRec box;
 ,23 → 1889,7
         return true;
 }
-static bool
-prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
-{
-        if (untiled_tlb_miss(tmp->dst.bo) ||
-            untiled_tlb_miss(tmp->src.bo))
-                return true;
-        if (kgem_bo_is_render(tmp->dst.bo) ||
-            kgem_bo_is_render(tmp->src.bo))
-                return false;
-        if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
-                return false;
-        return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo);
-}
 static bool
 gen6_render_composite(struct sna *sna,
               uint8_t op,
 ,33 → 1908,11
     DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
          width, height, sna->kgem.ring));
-        if (mask == NULL &&
-            try_blt(sna, dst, src, width, height) &&
-            sna_blt_composite(sna, op,
-                              src, dst,
-                              src_x, src_y,
-                              dst_x, dst_y,
-                              width, height,
-                              tmp, false))
-                return true;
-        if (gen6_composite_fallback(sna, src, mask, dst))
-                return false;
-        if (need_tiling(sna, width, height))
-                return sna_tiling_composite(op, src, mask, dst,
-                                            src_x, src_y,
-                                            msk_x, msk_y,
-                                            dst_x, dst_y,
-                                            width, height,
-                                            tmp);
         if (op == PictOpClear)
                 op = PictOpSrc;
         tmp->op = op;
         if (!gen6_composite_set_target(sna, tmp, dst,
-                                       dst_x, dst_y, width, height,
-                                       op > PictOpSrc || dst->pCompositeClip->data))
+                                       dst_x, dst_y, width, height))
                 return false;
         switch (gen6_composite_picture(sna, src, &tmp->src,
 ,15 → 2000,15
                                                             tmp->mask.bo != NULL,
                                                             tmp->has_component_alpha,
                                                             tmp->is_affine),
-                               gen4_choose_composite_emitter(sna, tmp));
+                               gen4_choose_composite_emitter(tmp));
-        tmp->blt   = gen6_render_composite_blt;
-        tmp->box   = gen6_render_composite_box;
-        tmp->boxes = gen6_render_composite_boxes__blt;
-        if (tmp->emit_boxes) {
-                tmp->boxes = gen6_render_composite_boxes;
-                tmp->thread_boxes = gen6_render_composite_boxes__thread;
-        }
+//      tmp->blt   = gen6_render_composite_blt;
+//    tmp->box   = gen6_render_composite_box;
+//      tmp->boxes = gen6_render_composite_boxes__blt;
+//      if (tmp->emit_boxes) {
+//              tmp->boxes = gen6_render_composite_boxes;
+//              tmp->thread_boxes = gen6_render_composite_boxes__thread;
+//      }
         tmp->done  = gen6_render_composite_done;
         kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
 ,6 → 2039,7
         return false;
 }
 #if !NO_COMPOSITE_SPANS
 fastcall static void
 gen6_render_composite_spans_box(struct sna *sna,
 ,7 → 2207,7
         tmp->base.op = op;
         if (!gen6_composite_set_target(sna, &tmp->base, dst,
-                                       dst_x, dst_y, width, height, true))
+                                       dst_x, dst_y, width, height))
                 return false;
         switch (gen6_composite_picture(sna, src, &tmp->base.src,
 ,7 → 2237,7
                                               SAMPLER_EXTEND_PAD),
                                gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
                                GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
-                               gen4_choose_spans_emitter(sna, tmp));
+                               gen4_choose_spans_emitter(tmp));
         tmp->box   = gen6_render_composite_spans_box;
         tmp->boxes = gen6_render_composite_spans_boxes;
 ,6 → 2271,7
 }
 #endif
 static void
 gen6_emit_copy_state(struct sna *sna,
                      const struct sna_composite_op *op)
 ,6 → 2304,7
         gen6_emit_state(sna, op, offset | dirty);
 }
 static inline bool prefer_blt_copy(struct sna *sna,
                                    struct kgem_bo *src_bo,
                                    struct kgem_bo *dst_bo,
 ,14 → 2326,11
             untiled_tlb_miss(dst_bo))
                 return true;
-        if (kgem_bo_is_render(dst_bo) ||
-            kgem_bo_is_render(src_bo))
-                return false;
         if (!prefer_blt_ring(sna, dst_bo, flags))
                 return false;
-        return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
+        return (prefer_blt_bo(sna, src_bo) >= 0 &&
+                prefer_blt_bo(sna, dst_bo) > 0);
 }
 inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
 ,8 → 2464,7
                                                    extents.x1 + dst_dx,
                                                    extents.y1 + dst_dy,
                                                    extents.x2 - extents.x1,
-                                                   extents.y2 - extents.y1,
-                                                   n > 1))
+                                                   extents.y2 - extents.y1))
                         goto fallback_tiled;
                 dst_dx += tmp.dst.x;
 ,14 → 2479,14
                 extents = box[0];
                 for (i = 1; i < n; i++) {
-                        if (box[i].x1 < extents.x1)
+                        if (extents.x1 < box[i].x1)
                                 extents.x1 = box[i].x1;
-                        if (box[i].y1 < extents.y1)
+                        if (extents.y1 < box[i].y1)
                                 extents.y1 = box[i].y1;
-                        if (box[i].x2 > extents.x2)
+                        if (extents.x2 > box[i].x2)
                                 extents.x2 = box[i].x2;
-                        if (box[i].y2 > extents.y2)
+                        if (extents.y2 > box[i].y2)
                                 extents.y2 = box[i].y2;
                 }
 ,35 → 2632,10
              src->drawable.width, src->drawable.height,
              dst->drawable.width, dst->drawable.height));
-        if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
-            sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
-            sna_blt_copy(sna, alu,
-                         src_bo, dst_bo,
-                         dst->drawable.bitsPerPixel,
-                         op))
-                return true;
-        if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
-            too_large(src->drawable.width, src->drawable.height) ||
-            too_large(dst->drawable.width, dst->drawable.height)) {
 fallback:
-                if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
-                        return false;
-                return sna_blt_copy(sna, alu, src_bo, dst_bo,
-                                    dst->drawable.bitsPerPixel,
-                                    op);
-        }
-        if (dst->drawable.depth == src->drawable.depth) {
-                op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
+    op->base.dst.format = PIXMAN_a8r8g8b8;
         op->base.src.pict_format = op->base.dst.format;
-        } else {
-                op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
-                op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
-        }
-        if (!gen6_check_format(op->base.src.pict_format))
-                goto fallback;
         op->base.dst.pixmap = dst;
         op->base.dst.width  = dst->drawable.width;
 ,7 → 2675,119
 }
 #endif
+static bool
+gen6_blit_tex(struct sna *sna,
+              uint8_t op, bool scale,
+                      PixmapPtr src, struct kgem_bo *src_bo,
+                      PixmapPtr mask,struct kgem_bo *mask_bo,
+                      PixmapPtr dst, struct kgem_bo *dst_bo,
+              int32_t src_x, int32_t src_y,
+              int32_t msk_x, int32_t msk_y,
+              int32_t dst_x, int32_t dst_y,
+              int32_t width, int32_t height,
+              struct sna_composite_op *tmp)
+{
+    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+         width, height, sna->kgem.ring));
+    tmp->op = PictOpSrc;
+    tmp->dst.pixmap = dst;
+    tmp->dst.bo     = dst_bo;
+    tmp->dst.width  = dst->drawable.width;
+    tmp->dst.height = dst->drawable.height;
+    tmp->dst.format = PICT_x8r8g8b8;
+        tmp->src.repeat = SAMPLER_EXTEND_NONE;
+    tmp->src.is_affine = true;
+    tmp->src.bo = src_bo;
+        tmp->src.pict_format = PICT_x8r8g8b8;
+    tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
+    tmp->src.width  = src->drawable.width;
+    tmp->src.height = src->drawable.height;
+        if ( (tmp->src.width  == width) &&
+         (tmp->src.height == height) )
+                tmp->src.filter = SAMPLER_FILTER_NEAREST;
+        else
+                tmp->src.filter = SAMPLER_FILTER_BILINEAR;
+        tmp->is_affine = tmp->src.is_affine;
+        tmp->has_component_alpha = false;
+        tmp->need_magic_ca_pass = false;
+        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+    tmp->mask.is_affine = true;
+    tmp->mask.bo = mask_bo;
+    tmp->mask.pict_format = PIXMAN_a8;
+    tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
+    tmp->mask.width  = mask->drawable.width;
+    tmp->mask.height = mask->drawable.height;
+    if( scale )
+    {
+        tmp->src.scale[0] = 1.f/width;
+        tmp->src.scale[1] = 1.f/height;
+    }
+    else
+    {
+        tmp->src.scale[0] = 1.f/src->drawable.width;
+        tmp->src.scale[1] = 1.f/src->drawable.height;
+    }
+//    tmp->src.offset[0] = -dst_x;
+//    tmp->src.offset[1] = -dst_y;
+    tmp->mask.scale[0] = 1.f/mask->drawable.width;
+    tmp->mask.scale[1] = 1.f/mask->drawable.height;
+//    tmp->mask.offset[0] = -dst_x;
+//    tmp->mask.offset[1] = -dst_y;
+        tmp->u.gen6.flags =
+                GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
+                                              tmp->src.repeat,
+                                              tmp->mask.filter,
+                                              tmp->mask.repeat),
+                               gen6_get_blend(tmp->op,
+                                              tmp->has_component_alpha,
+                                              tmp->dst.format),
+/*                             gen6_choose_composite_kernel(tmp->op,
+                                                            tmp->mask.bo != NULL,
+                                                            tmp->has_component_alpha,
+                                                            tmp->is_affine),
+*/
+                   GEN6_WM_KERNEL_MASK,
+                               gen4_choose_composite_emitter(tmp));
+        tmp->blt   = gen6_render_composite_blt;
+//    tmp->box   = gen6_render_composite_box;
+        tmp->done  = gen6_render_composite_done;
+        kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
+        if (!kgem_check_bo(&sna->kgem,
+                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
+                           NULL)) {
+                kgem_submit(&sna->kgem);
+                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+        }
+    gen6_emit_composite_state(sna, tmp);
+    gen6_align_vertex(sna, tmp);
+        return true;
+}
 #if 0
 static void
 gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
 {
 ,16 → 2826,10
         if (PREFER_RENDER)
                 return PREFER_RENDER < 0;
-        if (kgem_bo_is_render(bo))
-                return false;
         if (untiled_tlb_miss(bo))
                 return true;
-        if (!prefer_blt_ring(sna, bo, 0))
-                return false;
-        return prefer_blt_bo(sna, bo);
+        return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
 }
 static bool
 ,8 → 2910,7
                 if (!sna_render_composite_redirect(sna, &tmp,
                                                    extents.x1, extents.y1,
                                                    extents.x2 - extents.x1,
-                                                   extents.y2 - extents.y1,
-                                                   n > 1))
+                                                   extents.y2 - extents.y1))
                         return sna_tiling_fill_boxes(sna, op, format, color,
                                                      dst, dst_bo, box, n);
         }
 ,12 → 3385,12
 static bool is_gt2(struct sna *sna)
 {
-        return sna->PciInfo->device_id & 0x30;
+        return DEVICE_ID(sna->PciInfo) & 0x30;
 }
 static bool is_mobile(struct sna *sna)
 {
-        return (sna->PciInfo->device_id & 0xf) == 0x6;
+        return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
 }
 static bool gen6_render_setup(struct sna *sna)
 ,52 → 3471,28
     return state->general_bo != NULL;
 }
-const char *gen6_render_init(struct sna *sna, const char *backend)
+bool gen6_render_init(struct sna *sna)
 {
     if (!gen6_render_setup(sna))
-                return backend;
+                return false;
         sna->kgem.context_switch = gen6_render_context_switch;
         sna->kgem.retire = gen6_render_retire;
         sna->kgem.expire = gen6_render_expire;
-#if 0
-#if !NO_COMPOSITE
-        sna->render.composite = gen6_render_composite;
-        sna->render.prefer_gpu |= PREFER_GPU_RENDER;
-#endif
+//    sna->render.composite = gen6_render_composite;
+//    sna->render.video = gen6_render_video;
-#if !NO_COMPOSITE_SPANS
-        sna->render.check_composite_spans = gen6_check_composite_spans;
-        sna->render.composite_spans = gen6_render_composite_spans;
-        if (is_mobile(sna))
-                sna->render.prefer_gpu |= PREFER_GPU_SPANS;
-#endif
-        sna->render.video = gen6_render_video;
+//    sna->render.copy_boxes = gen6_render_copy_boxes;
-#if !NO_COPY_BOXES
-        sna->render.copy_boxes = gen6_render_copy_boxes;
-#endif
-#if !NO_COPY
-        sna->render.copy = gen6_render_copy;
-#endif
+    sna->render.blit_tex = gen6_blit_tex;
-#if !NO_FILL_BOXES
-        sna->render.fill_boxes = gen6_render_fill_boxes;
-#endif
-#if !NO_FILL
-        sna->render.fill = gen6_render_fill;
-#endif
-#if !NO_FILL_ONE
-        sna->render.fill_one = gen6_render_fill_one;
-#endif
-#if !NO_FILL_CLEAR
-        sna->render.clear = gen6_render_clear;
-#endif
-#endif
+//    sna->render.copy = gen6_render_copy;
-    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
-    sna->render.blit_tex = gen6_blit_tex;
+//    sna->render.fill_boxes = gen6_render_fill_boxes;
+//    sna->render.fill = gen6_render_fill;
+//    sna->render.fill_one = gen6_render_fill_one;
+//    sna->render.clear = gen6_render_clear;
     sna->render.flush = gen6_render_flush;
     sna->render.reset = gen6_render_reset;
 ,113 → 3500,9
     sna->render.max_3d_size = GEN6_MAX_SIZE;
     sna->render.max_3d_pitch = 1 << 18;
-        return sna->render_state.gen6.info->name;
-}
+    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
-static bool
-gen6_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp)
-{
-    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
-         width, height, sna->kgem.ring));
-    tmp->op = PictOpSrc;
-    tmp->dst.pixmap = dst;
-    tmp->dst.bo     = dst_bo;
-    tmp->dst.width  = dst->drawable.width;
-    tmp->dst.height = dst->drawable.height;
-    tmp->dst.format = PICT_x8r8g8b8;
-        tmp->src.repeat = SAMPLER_EXTEND_NONE;
-    tmp->src.is_affine = true;
-    tmp->src.bo = src_bo;
-        tmp->src.pict_format = PICT_x8r8g8b8;
-    tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
-    tmp->src.width  = src->drawable.width;
-    tmp->src.height = src->drawable.height;
-        if ( (tmp->src.width  == width) &&
-         (tmp->src.height == height) )
-                tmp->src.filter = SAMPLER_FILTER_NEAREST;
-        else
-                tmp->src.filter = SAMPLER_FILTER_BILINEAR;
-        tmp->is_affine = tmp->src.is_affine;
-        tmp->has_component_alpha = false;
-        tmp->need_magic_ca_pass = false;
-        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
-        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
-    tmp->mask.is_affine = true;
-    tmp->mask.bo = mask_bo;
-    tmp->mask.pict_format = PIXMAN_a8;
-    tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
-    tmp->mask.width  = mask->drawable.width;
-    tmp->mask.height = mask->drawable.height;
-    if( scale )
-    {
-        tmp->src.scale[0] = 1.f/width;
-        tmp->src.scale[1] = 1.f/height;
+        return true;
     }
-    else
-    {
-        tmp->src.scale[0] = 1.f/src->drawable.width;
-        tmp->src.scale[1] = 1.f/src->drawable.height;
-    }
-//    tmp->src.offset[0] = -dst_x;
-//    tmp->src.offset[1] = -dst_y;
-    tmp->mask.scale[0] = 1.f/mask->drawable.width;
-    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-//    tmp->mask.offset[0] = -dst_x;
-//    tmp->mask.offset[1] = -dst_y;
-        tmp->u.gen6.flags =
-                GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
-                                              tmp->src.repeat,
-                                              tmp->mask.filter,
-                                              tmp->mask.repeat),
-                               gen6_get_blend(tmp->op,
-                                              tmp->has_component_alpha,
-                                              tmp->dst.format),
-/*                             gen6_choose_composite_kernel(tmp->op,
-                                                            tmp->mask.bo != NULL,
-                                                            tmp->has_component_alpha,
-                                                            tmp->is_affine),
-*/
-                   GEN6_WM_KERNEL_MASK,
-                               gen4_choose_composite_emitter(sna, tmp));
-        tmp->blt   = gen6_render_composite_blt;
-//    tmp->box   = gen6_render_composite_box;
-        tmp->done  = gen6_render_composite_done;
-        kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                           NULL)) {
-                kgem_submit(&sna->kgem);
-                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
-        }
-    gen6_emit_composite_state(sna, tmp);
-    gen6_align_vertex(sna, tmp);
-        return true;
-}

 /drivers/video/Intel-2D/gen7_render.c
 ,8 → 45,6
 #include "gen4_source.h"
 #include "gen4_vertex.h"
-#define ALWAYS_FLUSH 0
 #define NO_COMPOSITE 0
 #define NO_COMPOSITE_SPANS 0
 #define NO_COPY 0
 ,7 → 75,6
 #define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 struct gt_info {
-        const char *name;
         uint32_t max_vs_threads;
         uint32_t max_gs_threads;
         uint32_t max_wm_threads;
 ,107 → 82,57
                 int size;
                 int max_vs_entries;
                 int max_gs_entries;
-                int push_ps_size; /* in 1KBs */
         } urb;
-        int gt;
 };
 static const struct gt_info ivb_gt_info = {
-        .name = "Ivybridge (gen7)",
         .max_vs_threads = 16,
         .max_gs_threads = 16,
         .max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
-        .urb = { 128, 64, 64, 8 },
-        .gt = 0,
+        .urb = { 128, 64, 64 },
 };
 static const struct gt_info ivb_gt1_info = {
-        .name = "Ivybridge (gen7, gt1)",
         .max_vs_threads = 36,
         .max_gs_threads = 36,
         .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
-        .urb = { 128, 512, 192, 8 },
-        .gt = 1,
+        .urb = { 128, 512, 192 },
 };
 static const struct gt_info ivb_gt2_info = {
-        .name = "Ivybridge (gen7, gt2)",
         .max_vs_threads = 128,
         .max_gs_threads = 128,
         .max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
-        .urb = { 256, 704, 320, 8 },
-        .gt = 2,
+        .urb = { 256, 704, 320 },
 };
-static const struct gt_info byt_gt_info = {
-        .name = "Baytrail (gen7)",
-        .urb = { 128, 64, 64 },
-        .max_vs_threads = 36,
-        .max_gs_threads = 36,
-        .max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
-        .urb = { 128, 512, 192, 8 },
-        .gt = 1,
-};
 static const struct gt_info hsw_gt_info = {
-        .name = "Haswell (gen7.5)",
         .max_vs_threads = 8,
         .max_gs_threads = 8,
         .max_wm_threads =
                 (8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 << HSW_PS_SAMPLE_MASK_SHIFT,
-        .urb = { 128, 64, 64, 8 },
-        .gt = 0,
+        .urb = { 128, 64, 64 },
 };
 static const struct gt_info hsw_gt1_info = {
-        .name = "Haswell (gen7.5, gt1)",
         .max_vs_threads = 70,
         .max_gs_threads = 70,
         .max_wm_threads =
                 (102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 << HSW_PS_SAMPLE_MASK_SHIFT,
-        .urb = { 128, 640, 256, 8 },
-        .gt = 1,
+        .urb = { 128, 640, 256 },
 };
 static const struct gt_info hsw_gt2_info = {
-        .name = "Haswell (gen7.5, gt2)",
-        .max_vs_threads = 140,
-        .max_gs_threads = 140,
-        .max_wm_threads =
-                (140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
-<< HSW_PS_SAMPLE_MASK_SHIFT,
-        .urb = { 256, 1664, 640, 8 },
-        .gt = 2,
-};
-static const struct gt_info hsw_gt3_info = {
-        .name = "Haswell (gen7.5, gt3)",
         .max_vs_threads = 280,
         .max_gs_threads = 280,
         .max_wm_threads =
-                (280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
+                (204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
 << HSW_PS_SAMPLE_MASK_SHIFT,
-        .urb = { 512, 3328, 1280, 16 },
-        .gt = 3,
+        .urb = { 256, 1664, 640 },
 };
-inline static bool is_ivb(struct sna *sna)
-{
-        return sna->kgem.gen == 070;
-}
-inline static bool is_byt(struct sna *sna)
-{
-        return sna->kgem.gen == 071;
-}
-inline static bool is_hsw(struct sna *sna)
-{
-        return sna->kgem.gen == 075;
-}
 static const uint32_t ps_kernel_packed[][4] = {
 #include "exa_wm_src_affine.g7b"
 #include "exa_wm_src_sample_argb.g7b"
 ,24 → 294,8
                 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
         case PICT_x8r8g8b8:
                 return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
-        case PICT_a8b8g8r8:
-                return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_x8b8g8r8:
-                return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
-        case PICT_a2r10g10b10:
-                return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_x2r10g10b10:
-                return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
-        case PICT_r8g8b8:
-                return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
-        case PICT_r5g6b5:
-                return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_a1r5g5b5:
-                return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
         case PICT_a8:
                 return GEN7_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-                return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
         }
 }
 ,22 → 307,8
         case PICT_a8r8g8b8:
         case PICT_x8r8g8b8:
                 return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
-        case PICT_a8b8g8r8:
-        case PICT_x8b8g8r8:
-                return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
-        case PICT_a2r10g10b10:
-        case PICT_x2r10g10b10:
-                return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
-        case PICT_r5g6b5:
-                return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
-        case PICT_x1r5g5b5:
-        case PICT_a1r5g5b5:
-                return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
         case PICT_a8:
                 return GEN7_SURFACEFORMAT_A8_UNORM;
-        case PICT_a4r4g4b4:
-        case PICT_x4r4g4b4:
-                return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
         }
 }
 ,7 → 335,7
 gen7_emit_urb(struct sna *sna)
 {
         OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
-        OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
+        OUT_BATCH(8); /* in 1KBs */
         /* num of VS entries must be divisible by 8 if size < 9 */
         OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
 ,10 → 359,6
 static void
 gen7_emit_state_base_address(struct sna *sna)
 {
-        uint32_t mocs;
-        mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
         OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
         OUT_BATCH(0); /* general */
         OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
 ,17 → 366,17
                                  NULL,
                                  I915_GEM_DOMAIN_INSTRUCTION << 16,
                                  BASE_ADDRESS_MODIFY));
-        OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
+        OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
                                  sna->kgem.nbatch,
                                  sna->render_state.gen7.general_bo,
                                  I915_GEM_DOMAIN_INSTRUCTION << 16,
-                                 mocs | BASE_ADDRESS_MODIFY));
+                                 BASE_ADDRESS_MODIFY));
         OUT_BATCH(0); /* indirect */
-        OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
+        OUT_BATCH(kgem_add_reloc(&sna->kgem,
                                  sna->kgem.nbatch,
                                  sna->render_state.gen7.general_bo,
                                  I915_GEM_DOMAIN_INSTRUCTION << 16,
-                                 mocs | BASE_ADDRESS_MODIFY));
+                                 BASE_ADDRESS_MODIFY));
         /* upper bounds, disable */
         OUT_BATCH(0);
 ,17 → 905,10
 }
 inline static void
-gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
+gen7_emit_pipe_flush(struct sna *sna)
 {
-        unsigned stall;
-        stall = 0;
-        if (need_stall)
-                stall = (GEN7_PIPE_CONTROL_CS_STALL |
-                         GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
         OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
-        OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
+        OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
         OUT_BATCH(0);
         OUT_BATCH(0);
 }
 ,7 → 930,8
 {
         bool need_stall;
-        assert(op->dst.bo->exec);
+        if (sna->render_state.gen7.emit_flush)
+                gen7_emit_pipe_flush(sna);
         gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
         gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
 ,18 → 942,13
         need_stall = gen7_emit_binding_table(sna, wm_binding_table);
         need_stall &= gen7_emit_drawing_rectangle(sna, op);
-        if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+        if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
                 gen7_emit_pipe_invalidate(sna);
                 kgem_clear_dirty(&sna->kgem);
-                assert(op->dst.bo->exec);
+                if (op->dst.bo->exec)
                         kgem_bo_mark_dirty(op->dst.bo);
-                sna->render_state.gen7.emit_flush = false;
                 need_stall = false;
         }
-        if (sna->render_state.gen7.emit_flush) {
-                gen7_emit_pipe_flush(sna, need_stall);
-                need_stall = false;
-        }
         if (need_stall)
                 gen7_emit_pipe_stall(sna);
 ,7 → 1091,7
         COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
         /* After the first bind, we manage the cache domains within the batch */
-        offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
+        offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
         if (offset) {
                 if (is_dst)
                         kgem_bo_mark_dirty(bo);
 ,12 → 1104,9
         ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
                  gen7_tiling_bits(bo->tiling) |
                  format << GEN7_SURFACE_FORMAT_SHIFT);
-        if (bo->tiling == I915_TILING_Y)
-                ss[0] |= GEN7_SURFACE_VALIGN_4;
-        if (is_dst) {
-                ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
+        if (is_dst)
                 domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
-        } else
+        else
                 domains = I915_GEM_DOMAIN_SAMPLER << 16;
         ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
         ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
 ,13 → 1113,13
                  (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
         ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
         ss[4] = 0;
-        ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
+        ss[5] = is_scanout ? 0 : 3 << 16;
         ss[6] = 0;
         ss[7] = 0;
-        if (is_hsw(sna))
+        if (sna->kgem.gen == 075)
                 ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
-        kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
+        kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
         DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
              offset, bo->handle, ss[1],
 ,8 → 1242,8
                         goto start;
         }
+        assert(op->floats_per_rect >= vertex_space(sna));
         assert(rem <= vertex_space(sna));
-        assert(op->floats_per_rect <= rem);
         if (want > 1 && want * op->floats_per_rect > rem)
                 want = rem / op->floats_per_rect;
 ,266 → 1398,114
         return sna_static_stream_offsetof(stream, base);
 }
-#if 0
-static uint32_t gen7_bind_video_source(struct sna *sna,
-                                       struct kgem_bo *bo,
-                                       uint32_t offset,
-                                       int width,
-                                       int height,
-                                       int pitch,
-                                       uint32_t format)
-{
-        uint32_t *ss, bind;
-        bind = sna->kgem.surface -=
-                sizeof(struct gen7_surface_state) / sizeof(uint32_t);
-        assert(bo->tiling == I915_TILING_NONE);
-        ss = sna->kgem.batch + bind;
-        ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
-                 format << GEN7_SURFACE_FORMAT_SHIFT);
-        ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
-                               I915_GEM_DOMAIN_SAMPLER << 16,
-                               offset);
-        ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
-                 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
-        ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
-        ss[4] = 0;
-        ss[5] = 0;
-        ss[6] = 0;
-        ss[7] = 0;
-        if (is_hsw(sna))
-                ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
-        DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
-             bind, bo->handle, ss[1],
-             format, width, height, pitch, offset));
-        return bind * sizeof(uint32_t);
-}
-static void gen7_emit_video_state(struct sna *sna,
+static void gen7_render_composite_done(struct sna *sna,
                                   const struct sna_composite_op *op)
 {
-        struct sna_video_frame *frame = op->priv;
-        uint32_t src_surf_format;
-        uint32_t src_surf_base[6];
-        int src_width[6];
-        int src_height[6];
-        int src_pitch[6];
-        uint32_t *binding_table;
-        uint16_t offset;
-        int n_src, n;
-        gen7_get_batch(sna, op);
-        src_surf_base[0] = 0;
-        src_surf_base[1] = 0;
-        src_surf_base[2] = frame->VBufOffset;
-        src_surf_base[3] = frame->VBufOffset;
-        src_surf_base[4] = frame->UBufOffset;
-        src_surf_base[5] = frame->UBufOffset;
-        if (is_planar_fourcc(frame->id)) {
-                src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
-                src_width[1]  = src_width[0]  = frame->width;
-                src_height[1] = src_height[0] = frame->height;
-                src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
-                src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
-                        frame->width / 2;
-                src_height[4] = src_height[5] = src_height[2] = src_height[3] =
-                        frame->height / 2;
-                src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
-                        frame->pitch[0];
-                n_src = 6;
-        } else {
-                if (frame->id == FOURCC_UYVY)
-                        src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
-                else
-                        src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
-                src_width[0]  = frame->width;
-                src_height[0] = frame->height;
-                src_pitch[0]  = frame->pitch[0];
-                n_src = 1;
+        if (sna->render.vertex_offset) {
+                gen4_vertex_flush(sna);
+                gen7_magic_ca_pass(sna, op);
         }
-        binding_table = gen7_composite_get_binding_table(sna, &offset);
-        binding_table[0] =
-                gen7_bind_bo(sna,
-                             op->dst.bo, op->dst.width, op->dst.height,
-                             gen7_get_dest_format(op->dst.format),
-                             true);
-        for (n = 0; n < n_src; n++) {
-                binding_table[1+n] =
-                        gen7_bind_video_source(sna,
-                                               frame->bo,
-                                               src_surf_base[n],
-                                               src_width[n],
-                                               src_height[n],
-                                               src_pitch[n],
-                                               src_surf_format);
         }
-        gen7_emit_state(sna, op, offset);
-}
 static bool
-gen7_render_video(struct sna *sna,
-                  struct sna_video *video,
-                  struct sna_video_frame *frame,
-                  RegionPtr dstRegion,
-                  PixmapPtr pixmap)
+gen7_blit_tex(struct sna *sna,
+              uint8_t op, bool scale,
+                      PixmapPtr src, struct kgem_bo *src_bo,
+                      PixmapPtr mask,struct kgem_bo *mask_bo,
+                      PixmapPtr dst, struct kgem_bo *dst_bo,
+              int32_t src_x, int32_t src_y,
+              int32_t msk_x, int32_t msk_y,
+              int32_t dst_x, int32_t dst_y,
+              int32_t width, int32_t height,
+              struct sna_composite_op *tmp)
 {
-        struct sna_composite_op tmp;
-        int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
-        int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
-        int src_width = frame->src.x2 - frame->src.x1;
-        int src_height = frame->src.y2 - frame->src.y1;
-        float src_offset_x, src_offset_y;
-        float src_scale_x, src_scale_y;
-        int nbox, pix_xoff, pix_yoff;
-        struct sna_pixmap *priv;
-        unsigned filter;
-        BoxPtr box;
-        DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
-             __FUNCTION__,
-             src_width, src_height, dst_width, dst_height,
-             (long)REGION_NUM_RECTS(dstRegion),
-             REGION_EXTENTS(NULL, dstRegion)->x1,
-             REGION_EXTENTS(NULL, dstRegion)->y1,
-             REGION_EXTENTS(NULL, dstRegion)->x2,
-             REGION_EXTENTS(NULL, dstRegion)->y2));
-        priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
-        if (priv == NULL)
-                return false;
+    tmp->op = PictOpSrc;
-        memset(&tmp, 0, sizeof(tmp));
+    tmp->dst.pixmap = dst;
+    tmp->dst.bo     = dst_bo;
+    tmp->dst.width  = dst->drawable.width;
+    tmp->dst.height = dst->drawable.height;
+    tmp->dst.format = PICT_x8r8g8b8;
-        tmp.dst.pixmap = pixmap;
-        tmp.dst.width  = pixmap->drawable.width;
-        tmp.dst.height = pixmap->drawable.height;
-        tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
-        tmp.dst.bo = priv->gpu_bo;
-        tmp.src.bo = frame->bo;
-        tmp.mask.bo = NULL;
+        tmp->src.repeat = RepeatNone;
+        tmp->src.filter = PictFilterNearest;
+    tmp->src.is_affine = true;
-        tmp.floats_per_vertex = 3;
-        tmp.floats_per_rect = 9;
+    tmp->src.bo = src_bo;
+        tmp->src.pict_format = PICT_x8r8g8b8;
+    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
+    tmp->src.width  = src->drawable.width;
+    tmp->src.height = src->drawable.height;
-        if (src_width == dst_width && src_height == dst_height)
-                filter = SAMPLER_FILTER_NEAREST;
-        else
-                filter = SAMPLER_FILTER_BILINEAR;
-        tmp.u.gen7.flags =
-                GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
-                                              SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
-                               NO_BLEND,
-                               is_planar_fourcc(frame->id) ?
-                               GEN7_WM_KERNEL_VIDEO_PLANAR :
-                               GEN7_WM_KERNEL_VIDEO_PACKED,
-);
-        tmp.priv = frame;
+        tmp->is_affine = tmp->src.is_affine;
+        tmp->has_component_alpha = false;
+        tmp->need_magic_ca_pass = false;
-        kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
-        if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
-                kgem_submit(&sna->kgem);
-                assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
-                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
-        }
+        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
+        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
+    tmp->mask.is_affine = true;
-        gen7_emit_video_state(sna, &tmp);
-        gen7_align_vertex(sna, &tmp);
+    tmp->mask.bo = mask_bo;
+    tmp->mask.pict_format = PIXMAN_a8;
+    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
+    tmp->mask.width  = mask->drawable.width;
+    tmp->mask.height = mask->drawable.height;
-        /* Set up the offset for translating from the given region (in screen
-         * coordinates) to the backing pixmap.
-         */
-#ifdef COMPOSITE
-        pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
-        pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
-#else
-        pix_xoff = 0;
-        pix_yoff = 0;
-#endif
+    if( scale )
+    {
+        tmp->src.scale[0] = 1.f/width;
+        tmp->src.scale[1] = 1.f/height;
+    }
+    else
+    {
+        tmp->src.scale[0] = 1.f/src->drawable.width;
+        tmp->src.scale[1] = 1.f/src->drawable.height;
+    }
-        DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
-             __FUNCTION__,
-             frame->src.x1, frame->src.y1,
-             src_width, src_height,
-             dst_width, dst_height,
-             frame->width, frame->height));
+    tmp->mask.scale[0] = 1.f/mask->drawable.width;
+    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-        src_scale_x = (float)src_width / dst_width / frame->width;
-        src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
-        src_scale_y = (float)src_height / dst_height / frame->height;
-        src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
-        DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
-             __FUNCTION__,
-             src_scale_x, src_scale_y,
-             src_offset_x, src_offset_y));
+        tmp->u.gen7.flags =
+                GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
+                                              tmp->src.repeat,
+                                              tmp->mask.filter,
+                                              tmp->mask.repeat),
+                               gen7_get_blend(tmp->op,
+                                              tmp->has_component_alpha,
+                                              tmp->dst.format),
+/*                             gen7_choose_composite_kernel(tmp->op,
+                                                            tmp->mask.bo != NULL,
+                                                            tmp->has_component_alpha,
+                                                            tmp->is_affine), */
+                   GEN7_WM_KERNEL_MASK,
+                               gen4_choose_composite_emitter(tmp));
-        box = REGION_RECTS(dstRegion);
-        nbox = REGION_NUM_RECTS(dstRegion);
-        while (nbox--) {
-                BoxRec r;
+        tmp->blt   = gen7_render_composite_blt;
+//      tmp->box   = gen7_render_composite_box;
+        tmp->done  = gen7_render_composite_done;
-                DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
-                     __FUNCTION__,
-                     box->x1, box->y1,
-                     box->x2, box->y2,
-                     pix_xoff, pix_yoff,
-                     box->x1 * src_scale_x + src_offset_x,
-                     box->y1 * src_scale_y + src_offset_y,
-                     box->x2 * src_scale_x + src_offset_x,
-                     box->y2 * src_scale_y + src_offset_y));
-                r.x1 = box->x1 + pix_xoff;
-                r.x2 = box->x2 + pix_xoff;
-                r.y1 = box->y1 + pix_yoff;
-                r.y2 = box->y2 + pix_yoff;
-                gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
-                OUT_VERTEX(r.x2, r.y2);
-                OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                OUT_VERTEX(r.x1, r.y2);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
-                OUT_VERTEX(r.x1, r.y1);
-                OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
-                OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
-                if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
-                        sna_damage_add_box(&priv->gpu_damage, &r);
-                        sna_damage_subtract_box(&priv->cpu_damage, &r);
+        kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
+        if (!kgem_check_bo(&sna->kgem,
+                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
+                           NULL)) {
+                kgem_submit(&sna->kgem);
+                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
                 }
-                box++;
-        }
-        gen4_vertex_flush(sna);
+        gen7_emit_composite_state(sna, tmp);
+        gen7_align_vertex(sna, tmp);
         return true;
 }
-#endif
-static void gen7_render_composite_done(struct sna *sna,
-                                       const struct sna_composite_op *op)
-{
-        if (sna->render.vertex_offset) {
-                gen4_vertex_flush(sna);
-                gen7_magic_ca_pass(sna, op);
-        }
-}
 ,143 → 1597,6
-#if 0
-static bool
-gen7_render_fill_boxes(struct sna *sna,
-                       CARD8 op,
-                       PictFormat format,
-                       const xRenderColor *color,
-                       PixmapPtr dst, struct kgem_bo *dst_bo,
-                       const BoxRec *box, int n)
-{
-        struct sna_composite_op tmp;
-        uint32_t pixel;
-        DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
-             __FUNCTION__, op,
-             color->red, color->green, color->blue, color->alpha, (int)format));
-        if (op >= ARRAY_SIZE(gen7_blend_op)) {
-                DBG(("%s: fallback due to unhandled blend op: %d\n",
-                     __FUNCTION__, op));
-                return false;
-        }
-        if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
-                uint8_t alu = GXinvalid;
-                if (op <= PictOpSrc) {
-                        pixel = 0;
-                        if (op == PictOpClear)
-                                alu = GXclear;
-                        else if (sna_get_pixel_from_rgba(&pixel,
-                                                         color->red,
-                                                         color->green,
-                                                         color->blue,
-                                                         color->alpha,
-                                                         format))
-                                alu = GXcopy;
-                }
-                if (alu != GXinvalid &&
-                    sna_blt_fill_boxes(sna, alu,
-                                       dst_bo, dst->drawable.bitsPerPixel,
-                                       pixel, box, n))
-                        return true;
-                if (!gen7_check_dst_format(format))
-                        return false;
-        }
-        if (op == PictOpClear) {
-                pixel = 0;
-                op = PictOpSrc;
-        } else if (!sna_get_pixel_from_rgba(&pixel,
-                                            color->red,
-                                            color->green,
-                                            color->blue,
-                                            color->alpha,
-                                            PICT_a8r8g8b8))
-                return false;
-        DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
-             __FUNCTION__, pixel, n,
-             box[0].x1, box[0].y1, box[0].x2, box[0].y2));
-        tmp.dst.pixmap = dst;
-        tmp.dst.width  = dst->drawable.width;
-        tmp.dst.height = dst->drawable.height;
-        tmp.dst.format = format;
-        tmp.dst.bo = dst_bo;
-        tmp.dst.x = tmp.dst.y = 0;
-        tmp.damage = NULL;
-        sna_render_composite_redirect_init(&tmp);
-        if (too_large(dst->drawable.width, dst->drawable.height)) {
-                BoxRec extents;
-                boxes_extents(box, n, &extents);
-                if (!sna_render_composite_redirect(sna, &tmp,
-                                                   extents.x1, extents.y1,
-                                                   extents.x2 - extents.x1,
-                                                   extents.y2 - extents.y1,
-                                                   n > 1))
-                        return sna_tiling_fill_boxes(sna, op, format, color,
-                                                     dst, dst_bo, box, n);
-        }
-        tmp.src.bo = sna_render_get_solid(sna, pixel);
-        tmp.mask.bo = NULL;
-        tmp.floats_per_vertex = 2;
-        tmp.floats_per_rect = 6;
-        tmp.need_magic_ca_pass = false;
-        tmp.u.gen7.flags = FILL_FLAGS(op, format);
-        kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
-        if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
-                kgem_submit(&sna->kgem);
-                assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
-        }
-        gen7_emit_fill_state(sna, &tmp);
-        gen7_align_vertex(sna, &tmp);
-        do {
-                int n_this_time;
-                int16_t *v;
-                n_this_time = gen7_get_rectangles(sna, &tmp, n,
-                                                  gen7_emit_fill_state);
-                n -= n_this_time;
-                v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
-                sna->render.vertex_used += 6 * n_this_time;
-                assert(sna->render.vertex_used <= sna->render.vertex_size);
-                do {
-                        DBG(("  (%d, %d), (%d, %d)\n",
-                             box->x1, box->y1, box->x2, box->y2));
-                        v[0] = box->x2;
-                        v[5] = v[1] = box->y2;
-                        v[8] = v[4] = box->x1;
-                        v[9] = box->y1;
-                        v[2] = v[3]  = v[7]  = 1;
-                        v[6] = v[10] = v[11] = 0;
-                        v += 12; box++;
-                } while (--n_this_time);
-        } while (n);
-        gen4_vertex_flush(sna);
-        kgem_bo_destroy(&sna->kgem, tmp.src.bo);
-        sna_render_composite_redirect_done(sna, &tmp);
-        return true;
-}
-#endif
 static void gen7_render_flush(struct sna *sna)
 {
         gen4_vertex_close(sna);
 ,20 → 1676,14
         kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
 }
-static bool is_gt3(struct sna *sna)
-{
-        assert(sna->kgem.gen == 075);
-        return sna->PciInfo->device_id & 0x20;
-}
 static bool is_gt2(struct sna *sna)
 {
-        return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
+        return DEVICE_ID(sna->PciInfo) & 0x20;
 }
 static bool is_mobile(struct sna *sna)
 {
-        return (sna->PciInfo->device_id & 0xf) == 0x6;
+        return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
 }
 static bool gen7_render_setup(struct sna *sna)
 ,24 → 1693,19
     struct gen7_sampler_state *ss;
     int i, j, k, l, m;
-        if (is_ivb(sna)) {
+    if (sna->kgem.gen == 070) {
         state->info = &ivb_gt_info;
-                if (sna->PciInfo->device_id & 0xf) {
+        if (DEVICE_ID(sna->PciInfo) & 0xf) {
             state->info = &ivb_gt1_info;
             if (is_gt2(sna))
                 state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
         }
-        } else if (is_byt(sna)) {
-                state->info = &byt_gt_info;
-        } else if (is_hsw(sna)) {
+    } else if (sna->kgem.gen == 075) {
         state->info = &hsw_gt_info;
-                if (sna->PciInfo->device_id & 0xf) {
-                        if (is_gt3(sna))
-                                state->info = &hsw_gt3_info;
-                        else if (is_gt2(sna))
+        if (DEVICE_ID(sna->PciInfo) & 0xf) {
+            state->info = &hsw_gt1_info;
+            if (is_gt2(sna))
                                 state->info = &hsw_gt2_info;
-                        else
-            state->info = &hsw_gt1_info;
         }
     } else
         return false;
 ,51 → 1772,16
     return state->general_bo != NULL;
 }
-const char *gen7_render_init(struct sna *sna, const char *backend)
+bool gen7_render_init(struct sna *sna)
 {
     if (!gen7_render_setup(sna))
-                return backend;
+        return false;
     sna->kgem.context_switch = gen7_render_context_switch;
     sna->kgem.retire = gen7_render_retire;
     sna->kgem.expire = gen7_render_expire;
-#if 0
-#if !NO_COMPOSITE
-        sna->render.composite = gen7_render_composite;
-        sna->render.prefer_gpu |= PREFER_GPU_RENDER;
-#endif
-#if !NO_COMPOSITE_SPANS
-        sna->render.check_composite_spans = gen7_check_composite_spans;
-        sna->render.composite_spans = gen7_render_composite_spans;
-        if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
-                sna->render.prefer_gpu |= PREFER_GPU_SPANS;
-#endif
-        sna->render.video = gen7_render_video;
-#if !NO_COPY_BOXES
-        sna->render.copy_boxes = gen7_render_copy_boxes;
-#endif
-#if !NO_COPY
-        sna->render.copy = gen7_render_copy;
-#endif
-#if !NO_FILL_BOXES
-        sna->render.fill_boxes = gen7_render_fill_boxes;
-#endif
-#if !NO_FILL
-        sna->render.fill = gen7_render_fill;
-#endif
-#if !NO_FILL_ONE
-        sna->render.fill_one = gen7_render_fill_one;
-#endif
-#if !NO_FILL_CLEAR
-        sna->render.clear = gen7_render_clear;
-#endif
-#endif
     sna->render.blit_tex = gen7_blit_tex;
-    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
     sna->render.flush = gen7_render_flush;
     sna->render.reset = gen7_render_reset;
 ,102 → 1789,9
     sna->render.max_3d_size = GEN7_MAX_SIZE;
     sna->render.max_3d_pitch = 1 << 18;
-        return sna->render_state.gen7.info->name;
-}
+    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
-static bool
-gen7_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp)
-{
-    tmp->op = PictOpSrc;
-    tmp->dst.pixmap = dst;
-    tmp->dst.bo     = dst_bo;
-    tmp->dst.width  = dst->drawable.width;
-    tmp->dst.height = dst->drawable.height;
-    tmp->dst.format = PICT_x8r8g8b8;
-        tmp->src.repeat = RepeatNone;
-        tmp->src.filter = PictFilterNearest;
-    tmp->src.is_affine = true;
-    tmp->src.bo = src_bo;
-        tmp->src.pict_format = PICT_x8r8g8b8;
-    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
-    tmp->src.width  = src->drawable.width;
-    tmp->src.height = src->drawable.height;
-        tmp->is_affine = tmp->src.is_affine;
-        tmp->has_component_alpha = false;
-        tmp->need_magic_ca_pass = false;
-        tmp->mask.repeat = SAMPLER_EXTEND_NONE;
-        tmp->mask.filter = SAMPLER_FILTER_NEAREST;
-    tmp->mask.is_affine = true;
-    tmp->mask.bo = mask_bo;
-    tmp->mask.pict_format = PIXMAN_a8;
-    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
-    tmp->mask.width  = mask->drawable.width;
-    tmp->mask.height = mask->drawable.height;
-    if( scale )
-    {
-        tmp->src.scale[0] = 1.f/width;
-        tmp->src.scale[1] = 1.f/height;
+    return true;
     }
-    else
-    {
-        tmp->src.scale[0] = 1.f/src->drawable.width;
-        tmp->src.scale[1] = 1.f/src->drawable.height;
-    }
-    tmp->mask.scale[0] = 1.f/mask->drawable.width;
-    tmp->mask.scale[1] = 1.f/mask->drawable.height;
-        tmp->u.gen7.flags =
-                GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
-                                              tmp->src.repeat,
-                                              tmp->mask.filter,
-                                              tmp->mask.repeat),
-                               gen7_get_blend(tmp->op,
-                                              tmp->has_component_alpha,
-                                              tmp->dst.format),
-/*                             gen7_choose_composite_kernel(tmp->op,
-                                                            tmp->mask.bo != NULL,
-                                                            tmp->has_component_alpha,
-                                                            tmp->is_affine), */
-                   GEN7_WM_KERNEL_MASK,
-                               gen4_choose_composite_emitter(sna, tmp));
-        tmp->blt   = gen7_render_composite_blt;
-//      tmp->box   = gen7_render_composite_box;
-        tmp->done  = gen7_render_composite_done;
-        kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
-        if (!kgem_check_bo(&sna->kgem,
-                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
-                           NULL)) {
-                kgem_submit(&sna->kgem);
-                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
-        }
-        gen7_emit_composite_state(sna, tmp);
-        gen7_align_vertex(sna, tmp);
-        return true;
-}

 /drivers/video/Intel-2D/sna.c
 ,51 → 1,11
-/**************************************************************************
-Copyright 2001 VA Linux Systems Inc., Fremont, California.
-Copyright © 2002 by David Dawes
-All Rights Reserved.
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the "Software"),
-to deal in the Software without restriction, including without limitation
-on the rights to use, copy, modify, merge, publish, distribute, sub
-license, and/or sell copies of the Software, and to permit persons to whom
-the Software is furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice (including the next
-paragraph) shall be included in all copies or substantial portions of the
-Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
-THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
-DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
-OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
-USE OR OTHER DEALINGS IN THE SOFTWARE.
-**************************************************************************/
-/*
- * Authors: Jeff Hartmann <jhartmann@valinux.com>
- *          Abraham van der Merwe <abraham@2d3d.co.za>
- *          David Dawes <dawes@xfree86.org>
- *          Alan Hourihane <alanh@tungstengraphics.com>
- */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
 #include <memory.h>
 #include <malloc.h>
 #include <kos32sys.h>
 #include <pixlib2.h>
-#include "i915_pciids.h"
-#include "compiler.h"
 #include "sna.h"
-#include "intel_driver.h"
 #define to_surface(x) (surface_t*)((x)->handle)
 ,8 → 46,6
 uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing,
                                   unsigned flags, uint32_t width, uint32_t height,
                                   uint32_t bpp, uint32_t tiling, uint32_t *pitch);
-struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
-                        int pitch, int height);
 void kgem_close_batches(struct kgem *kgem);
 void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
 ,6 → 53,8
 const struct intel_device_info *
 intel_detect_chipset(struct pci_device *pci);
+//struct kgem_bo *create_bo(bitmap_t *bitmap);
 static bool sna_solid_cache_init(struct sna *sna);
 struct sna *sna_device;
 ,46 → 66,8
         (void)sna;
 }
-static void no_render_flush(struct sna *sna)
+void no_render_init(struct sna *sna)
 {
-        (void)sna;
-}
-static void
-no_render_context_switch(struct kgem *kgem,
-                         int new_mode)
-{
-        if (!kgem->nbatch)
-                return;
-        if (kgem_ring_is_idle(kgem, kgem->ring)) {
-                DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
-                _kgem_submit(kgem);
-        }
-        (void)new_mode;
-}
-static void
-no_render_retire(struct kgem *kgem)
-{
-        (void)kgem;
-}
-static void
-no_render_expire(struct kgem *kgem)
-{
-        (void)kgem;
-}
-static void
-no_render_fini(struct sna *sna)
-{
-        (void)sna;
-}
-const char *no_render_init(struct sna *sna)
-{
     struct sna_render *render = &sna->render;
     memset (render,0, sizeof (*render));
 ,19 → 77,27
     render->vertices = render->vertex_data;
     render->vertex_size = ARRAY_SIZE(render->vertex_data);
+//    render->composite = no_render_composite;
+//    render->copy_boxes = no_render_copy_boxes;
+//    render->copy = no_render_copy;
+//    render->fill_boxes = no_render_fill_boxes;
+//    render->fill = no_render_fill;
+//    render->fill_one = no_render_fill_one;
+//    render->clear = no_render_clear;
     render->reset = no_render_reset;
-        render->flush = no_render_flush;
-        render->fini = no_render_fini;
+//    render->flush = no_render_flush;
+//    render->fini = no_render_fini;
-        sna->kgem.context_switch = no_render_context_switch;
-        sna->kgem.retire = no_render_retire;
-        sna->kgem.expire = no_render_expire;
+//    sna->kgem.context_switch = no_render_context_switch;
+//    sna->kgem.retire = no_render_retire;
-        sna->kgem.mode = KGEM_RENDER;
+      if (sna->kgem.gen >= 60)
         sna->kgem.ring = KGEM_RENDER;
         sna_vertex_init(sna);
-        return "generic";
  }
 void sna_vertex_init(struct sna *sna)
 ,27 → 111,47
 {
     const char *backend;
-        backend = no_render_init(sna);
-        if (sna->info->gen >= 0100)
-                (void)backend;
-        else if (sna->info->gen >= 070)
-                backend = gen7_render_init(sna, backend);
-        else if (sna->info->gen >= 060)
-                backend = gen6_render_init(sna, backend);
-        else if (sna->info->gen >= 050)
-                backend = gen5_render_init(sna, backend);
-        else if (sna->info->gen >= 040)
-                backend = gen4_render_init(sna, backend);
-        else if (sna->info->gen >= 030)
-                backend = gen3_render_init(sna, backend);
+//    list_init(&sna->deferred_free);
+//    list_init(&sna->dirty_pixmaps);
+//    list_init(&sna->active_pixmaps);
+//    list_init(&sna->inactive_clock[0]);
+//    list_init(&sna->inactive_clock[1]);
+//    sna_accel_install_timers(sna);
+    backend = "no";
+    no_render_init(sna);
+        if (sna->info->gen >= 0100) {
+        } else if (sna->info->gen >= 070) {
+                if (gen7_render_init(sna))
+                        backend = "IvyBridge";
+        } else if (sna->info->gen >= 060) {
+                if (gen6_render_init(sna))
+                        backend = "SandyBridge";
+        } else if (sna->info->gen >= 050) {
+                if (gen5_render_init(sna))
+                        backend = "Ironlake";
+        } else if (sna->info->gen >= 040) {
+                if (gen4_render_init(sna))
+                        backend = "Broadwater/Crestline";
+        } else if (sna->info->gen >= 030) {
+                if (gen3_render_init(sna))
+                        backend = "gen3";
+        }
         DBG(("%s(backend=%s, prefer_gpu=%x)\n",
              __FUNCTION__, backend, sna->render.prefer_gpu));
         kgem_reset(&sna->kgem);
+//    if (!sna_solid_cache_init(sna))
+//        return false;
     sna_device = sna;
     return kgem_init_fb(&sna->kgem, &sna_fb);
 }
 ,17 → 186,33
     memset(sna, 0, sizeof(*sna));
-    sna->cpu_features = sna_cpu_detect();
+    sna->PciInfo = &device;
-    sna->PciInfo = &device;
         sna->info = intel_detect_chipset(sna->PciInfo);
-    sna->scrn = service;
     kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
+/*
+    if (!xf86ReturnOptValBool(sna->Options,
+                  OPTION_RELAXED_FENCING,
+                  sna->kgem.has_relaxed_fencing)) {
+        xf86DrvMsg(scrn->scrnIndex,
+               sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED,
+               "Disabling use of relaxed fencing\n");
+        sna->kgem.has_relaxed_fencing = 0;
+    }
+    if (!xf86ReturnOptValBool(sna->Options,
+                  OPTION_VMAP,
+                  sna->kgem.has_vmap)) {
+        xf86DrvMsg(scrn->scrnIndex,
+               sna->kgem.has_vmap ? X_CONFIG : X_PROBED,
+               "Disabling use of vmap\n");
+        sna->kgem.has_vmap = 0;
+    }
+*/
     /* Disable tiling by default */
-    sna->tiling = 0;
+    sna->tiling = SNA_TILING_DISABLE;
     /* Default fail-safe value of 75 Hz */
 //    sna->vblank_interval = 1000 * 1000 * 1000 / 75;
 ,47 → 490,6
     return -1;
 };
-int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
-{
-    surface_t *sf;
-        struct kgem_bo *bo;
-    sf = malloc(sizeof(*sf));
-    if(sf == NULL)
-        goto err_1;
-    __lock_acquire_recursive(__sna_lock);
-    bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height);
-    __lock_release_recursive(__sna_lock);
-    sf->width   = bitmap->width;
-    sf->height  = bitmap->height;
-    sf->data    = NULL;
-    sf->pitch   = bo->pitch;
-    sf->bo      = bo;
-    sf->bo_size = PAGE_SIZE * bo->size.pages.count;
-    sf->flags   = bitmap->flags;
-    bitmap->handle = (uint32_t)sf;
-    return 0;
-err_2:
-    __lock_release_recursive(__sna_lock);
-    free(sf);
-err_1:
-    return -1;
-};
-void sna_set_bo_handle(bitmap_t *bitmap, int handle)
-{
-    surface_t *sf = to_surface(bitmap);
-    struct kgem_bo *bo = sf->bo;
-    bo->handle = handle;
-}
 int sna_destroy_bitmap(bitmap_t *bitmap)
 {
     surface_t *sf = to_surface(bitmap);
 ,7 → 686,7
     VG_CLEAR(update);
         update.handle = mask_bo->handle;
-        update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo);
+        update.bo_map    = (__u32)MAP(mask_bo->map);
         drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
     mask_bo->pitch = update.bo_pitch;
 ,6 → 756,8
 static const struct intel_device_info intel_generic_info = {
         .gen = -1,
 };
 ,37 → 807,100
 static const struct pci_id_match intel_device_match[] = {
-        INTEL_I915G_IDS(&intel_i915_info),
-        INTEL_I915GM_IDS(&intel_i915_info),
-        INTEL_I945G_IDS(&intel_i945_info),
-        INTEL_I945GM_IDS(&intel_i945_info),
-        INTEL_G33_IDS(&intel_g33_info),
-        INTEL_PINEVIEW_IDS(&intel_g33_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ),
-        INTEL_I965G_IDS(&intel_i965_info),
-        INTEL_I965GM_IDS(&intel_i965_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ),
+        /* Another marketing win: Q35 is another g33 device not a gen4 part
+         * like its G35 brethren.
+         */
+        INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ),
-        INTEL_G45_IDS(&intel_g4x_info),
-        INTEL_GM45_IDS(&intel_g4x_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ),
-        INTEL_IRONLAKE_D_IDS(&intel_ironlake_info),
-        INTEL_IRONLAKE_M_IDS(&intel_ironlake_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ),
-        INTEL_SNB_D_IDS(&intel_sandybridge_info),
-        INTEL_SNB_M_IDS(&intel_sandybridge_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ),
-        INTEL_IVB_D_IDS(&intel_ivybridge_info),
-        INTEL_IVB_M_IDS(&intel_ivybridge_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ),
-        INTEL_HSW_D_IDS(&intel_haswell_info),
-        INTEL_HSW_M_IDS(&intel_haswell_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ),
-        INTEL_VLV_D_IDS(&intel_valleyview_info),
-        INTEL_VLV_M_IDS(&intel_valleyview_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ),
-        INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
+        INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ),
+        INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ),
+        INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ),
         { 0, 0, 0 },
 };
 ,21 → 948,7
 }
-int intel_get_device_id(int fd)
-{
-        struct drm_i915_getparam gp;
-        int devid = 0;
-        memset(&gp, 0, sizeof(gp));
-        gp.param = I915_PARAM_CHIPSET_ID;
-        gp.value = &devid;
-        if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
-                return 0;
-        return devid;
-}
 int drmIoctl(int fd, unsigned long request, void *arg)
 {
     ioctl_t  io;

 /drivers/video/Intel-2D/sna.h
 ,83 → 37,26
 #ifndef _SNA_H_
 #define _SNA_H_
-#include <stdint.h>
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
-#include "compiler.h"
+#include <stdint.h>
 #include <memory.h>
 #include <malloc.h>
 #include <errno.h>
-#include <kos32sys.h>
 #include "intel_driver.h"
 #include "pciaccess.h"
-#include <drm.h>
-#include <i915_drm.h>
+#include "compiler.h"
-#ifdef HAVE_DRI2_H
-#include <dri2.h>
-#endif
+//#define DBG(x)
+//#define DBG(x) ErrorF x
-#if HAVE_UDEV
-#include <libudev.h>
-#endif
+#define assert(x)
-#if 0
-#include <xorg-server.h>
-#include <xf86Crtc.h>
-#if XF86_CRTC_VERSION >= 5
-#define HAS_PIXMAP_SHARING 1
-#endif
-#include <xf86str.h>
-#include <windowstr.h>
-#include <glyphstr.h>
-#include <picturestr.h>
-#include <gcstruct.h>
-#include <xvdix.h>
-#include <pciaccess.h>
-#include <xf86drmMode.h>
-#include "../compat-api.h"
-#endif
-#include <assert.h>
-#define ErrorF printf
-#if HAS_DEBUG_FULL
-#define DBG(x) ErrorF x
-#else
-#define DBG(x)
-#endif
-#define DEBUG_NO_BLT 0
-#define DEBUG_FLUSH_BATCH 0
-#define TEST_ALL 0
-#define TEST_ACCEL (TEST_ALL || 0)
-#define TEST_BATCH (TEST_ALL || 0)
-#define TEST_BLT (TEST_ALL || 0)
-#define TEST_COMPOSITE (TEST_ALL || 0)
-#define TEST_DAMAGE (TEST_ALL || 0)
-#define TEST_GRADIENT (TEST_ALL || 0)
-#define TEST_GLYPHS (TEST_ALL || 0)
-#define TEST_IO (TEST_ALL || 0)
-#define TEST_KGEM (TEST_ALL || 0)
-#define TEST_RENDER (TEST_ALL || 0)
 int drmIoctl(int fd, unsigned long request, void *arg);
 ,69 → 94,12
     PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
     PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
-/* sRGB formats */
-    PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
+    PIXMAN_a8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0)
-/* 24bpp formats */
-    PIXMAN_r8g8b8 =      PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
-    PIXMAN_b8g8r8 =      PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
-/* 16bpp formats */
-    PIXMAN_r5g6b5 =      PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
-    PIXMAN_b5g6r5 =      PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
-    PIXMAN_a1r5g5b5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
-    PIXMAN_x1r5g5b5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
-    PIXMAN_a1b5g5r5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
-    PIXMAN_x1b5g5r5 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
-    PIXMAN_a4r4g4b4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
-    PIXMAN_x4r4g4b4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
-    PIXMAN_a4b4g4r4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
-    PIXMAN_x4b4g4r4 =    PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
-/* 8bpp formats */
-    PIXMAN_a8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
-    PIXMAN_r3g3b2 =      PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
-    PIXMAN_b2g3r3 =      PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
-    PIXMAN_a2r2g2b2 =    PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
-    PIXMAN_a2b2g2r2 =    PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
-    PIXMAN_c8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g8 =          PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-    PIXMAN_x4a4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
-    PIXMAN_x4c4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_x4g4 =        PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
-/* 4bpp formats */
-    PIXMAN_a4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
-    PIXMAN_r1g2b1 =      PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
-    PIXMAN_b1g2r1 =      PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
-    PIXMAN_a1r1g1b1 =    PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
-    PIXMAN_a1b1g1r1 =    PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
-    PIXMAN_c4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g4 =          PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
-/* 1bpp formats */
-    PIXMAN_a1 =          PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
-    PIXMAN_g1 =          PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
-/* YUV formats */
-    PIXMAN_yuy2 =        PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
-    PIXMAN_yv12 =        PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
 } pixman_format_code_t;
 typedef enum _PictFormatShort {
-    PICT_a2r10g10b10 = PIXMAN_a2r10g10b10,
-    PICT_x2r10g10b10 = PIXMAN_x2r10g10b10,
-    PICT_a2b10g10r10 = PIXMAN_a2b10g10r10,
-    PICT_x2b10g10r10 = PIXMAN_x2b10g10r10,
     PICT_a8r8g8b8 = PIXMAN_a8r8g8b8,
     PICT_x8r8g8b8 = PIXMAN_x8r8g8b8,
     PICT_a8b8g8r8 = PIXMAN_a8b8g8r8,
 ,52 → 107,10
     PICT_b8g8r8a8 = PIXMAN_b8g8r8a8,
     PICT_b8g8r8x8 = PIXMAN_b8g8r8x8,
-/* 24bpp formats */
-    PICT_r8g8b8 = PIXMAN_r8g8b8,
-    PICT_b8g8r8 = PIXMAN_b8g8r8,
-/* 16bpp formats */
-    PICT_r5g6b5 = PIXMAN_r5g6b5,
-    PICT_b5g6r5 = PIXMAN_b5g6r5,
-    PICT_a1r5g5b5 = PIXMAN_a1r5g5b5,
-    PICT_x1r5g5b5 = PIXMAN_x1r5g5b5,
-    PICT_a1b5g5r5 = PIXMAN_a1b5g5r5,
-    PICT_x1b5g5r5 = PIXMAN_x1b5g5r5,
-    PICT_a4r4g4b4 = PIXMAN_a4r4g4b4,
-    PICT_x4r4g4b4 = PIXMAN_x4r4g4b4,
-    PICT_a4b4g4r4 = PIXMAN_a4b4g4r4,
-    PICT_x4b4g4r4 = PIXMAN_x4b4g4r4,
 /* 8bpp formats */
     PICT_a8 = PIXMAN_a8,
-    PICT_r3g3b2 = PIXMAN_r3g3b2,
-    PICT_b2g3r3 = PIXMAN_b2g3r3,
-    PICT_a2r2g2b2 = PIXMAN_a2r2g2b2,
-    PICT_a2b2g2r2 = PIXMAN_a2b2g2r2,
-    PICT_c8 = PIXMAN_c8,
-    PICT_g8 = PIXMAN_g8,
-    PICT_x4a4 = PIXMAN_x4a4,
-    PICT_x4c4 = PIXMAN_x4c4,
-    PICT_x4g4 = PIXMAN_x4g4,
 /* 4bpp formats */
-    PICT_a4 = PIXMAN_a4,
-    PICT_r1g2b1 = PIXMAN_r1g2b1,
-    PICT_b1g2r1 = PIXMAN_b1g2r1,
-    PICT_a1r1g1b1 = PIXMAN_a1r1g1b1,
-    PICT_a1b1g1r1 = PIXMAN_a1b1g1r1,
-    PICT_c4 = PIXMAN_c4,
-    PICT_g4 = PIXMAN_g4,
-/* 1bpp formats */
-    PICT_a1 = PIXMAN_a1,
-    PICT_g1 = PIXMAN_g1
 } PictFormatShort;
 #define PIXMAN_FORMAT_A(f)      (((f) >> 12) & 0x0f)
 ,22 → 266,8
 #define PictOpMaximum           13
-#define SNA_CURSOR_X                    64
-#define SNA_CURSOR_Y                    SNA_CURSOR_X
-struct sna_client {
-        int is_compositor; /* only 4 bits used */
-};
-//#define assert(x)
 struct sna {
-        struct kgem kgem;
-        unsigned scrn;
     unsigned flags;
 #define SNA_NO_WAIT             0x1
 #define SNA_NO_FLIP             0x2
 ,31 → 274,21
 #define SNA_TRIPLE_BUFFER       0x4
 #define SNA_TEAR_FREE           0x10
 #define SNA_FORCE_SHADOW        0x20
-#define SNA_FLUSH_GTT           0x40
-#define SNA_IS_HOSTED           0x80
-#define SNA_PERFORMANCE         0x100
-#define SNA_POWERSAVE           0x200
-#define SNA_REPROBE             0x80000000
-        unsigned cpu_features;
-#define MMX 0x1
-#define SSE 0x2
-#define SSE2 0x4
-#define SSE3 0x8
-#define SSSE3 0x10
-#define SSE4_1 0x20
-#define SSE4_2 0x40
-#define AVX 0x80
-#define AVX2 0x100
         struct list flush_pixmaps;
         struct list active_pixmaps;
+//    int vblank_interval;
+//    struct list deferred_free;
+//    struct list dirty_pixmaps;
+//    struct list active_pixmaps;
+//    struct list inactive_clock[2];
     unsigned int tiling;
+#define SNA_TILING_DISABLE  0x0
 #define SNA_TILING_FB       0x1
 #define SNA_TILING_2D       0x2
 #define SNA_TILING_ALL     (~0)
 ,6 → 303,7
         uint32_t fill_alu;
     } blt_state;
     union {
+//        struct gen2_render_state gen2;
         struct gen3_render_state gen3;
         struct gen4_render_state gen4;
         struct gen5_render_state gen5;
 ,11 → 318,12
     /* Driver phase/state information */
 //    Bool suspended;
+    struct kgem kgem;
     struct sna_render render;
 #if DEBUG_MEMORY
         struct {
-                int pixmap_allocs;
+               int shadow_pixels_allocs;
                int cpu_bo_allocs;
                size_t shadow_pixels_bytes;
                size_t cpu_bo_bytes;
 ,78 → 352,4
 #ifndef MAX
 #define MAX(a,b)        ((a) >= (b) ? (a) : (b))
 #endif
-static inline bool
-_sna_transform_point(const PictTransform *transform,
-                     int64_t x, int64_t y, int64_t result[3])
-{
-        int j;
-        for (j = 0; j < 3; j++)
-                result[j] = (transform->matrix[j][0] * x +
-                             transform->matrix[j][1] * y +
-                             transform->matrix[j][2]);
-        return result[2] != 0;
-}
-static inline void
-_sna_get_transformed_coordinates(int x, int y,
-                                 const PictTransform *transform,
-                                 float *x_out, float *y_out)
-{
-        int64_t result[3];
-        _sna_transform_point(transform, x, y, result);
-        *x_out = result[0] / (double)result[2];
-        *y_out = result[1] / (double)result[2];
-}
-static inline void
-_sna_get_transformed_scaled(int x, int y,
-                            const PictTransform *transform, const float *sf,
-                            float *x_out, float *y_out)
-{
-        *x_out = sf[0] * (transform->matrix[0][0] * x +
-                          transform->matrix[0][1] * y +
-                          transform->matrix[0][2]);
-        *y_out = sf[1] * (transform->matrix[1][0] * x +
-                          transform->matrix[1][1] * y +
-                          transform->matrix[1][2]);
-}
-void
-sna_get_transformed_coordinates(int x, int y,
-                                const PictTransform *transform,
-                                float *x_out, float *y_out);
-void
-sna_get_transformed_coordinates_3d(int x, int y,
-                                   const PictTransform *transform,
-                                   float *x_out, float *y_out, float *z_out);
-bool sna_transform_is_affine(const PictTransform *t);
-bool sna_transform_is_integer_translation(const PictTransform *t,
-                                          int16_t *tx, int16_t *ty);
-bool sna_transform_is_translation(const PictTransform *t,
-                                  pixman_fixed_t *tx, pixman_fixed_t *ty);
-static inline bool
-sna_affine_transform_is_rotation(const PictTransform *t)
-{
-        assert(sna_transform_is_affine(t));
-        return t->matrix[0][1] | t->matrix[1][0];
-}
-static inline bool
-sna_transform_equal(const PictTransform *a, const PictTransform *b)
-{
-        if (a == b)
-                return true;
-        if (a == NULL || b == NULL)
-                return false;
-        return memcmp(a, b, sizeof(*a)) == 0;
-}
 #endif /* _SNA_H */

 /drivers/video/Intel-2D/sna_render.h
 ,7 → 30,11
 struct sna_composite_op {
     fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
                  const struct sna_composite_rectangles *r);
+    fastcall void (*box)(struct sna *sna,
+                 const struct sna_composite_op *op,
+                 const BoxRec *box);
+    void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
+              const BoxRec *box, int nbox);
     void (*done)(struct sna *sna, const struct sna_composite_op *op);
     struct sna_damage **damage;
 ,6 → 192,9
                       struct sna_video *video,
                       struct sna_video_frame *frame,
                       RegionPtr dstRegion,
+                      short src_w, short src_h,
+                      short drw_w, short drw_h,
+                      short dx, short dy,
                       PixmapPtr pixmap);
         bool (*fill_boxes)(struct sna *sna,
 ,10 → 358,10
         int ve_id;
         uint32_t drawrect_offset;
         uint32_t drawrect_limit;
-        uint32_t last_pipelined_pointers;
         uint16_t last_primitive;
         int16_t floats_per_vertex;
         uint16_t surface_table;
+        uint16_t last_pipelined_pointers;
         bool needs_invariant;
 };
 ,15 → 505,17
                              uint32_t format);
 bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
-const char *no_render_init(struct sna *sna);
-const char *gen2_render_init(struct sna *sna, const char *backend);
-const char *gen3_render_init(struct sna *sna, const char *backend);
-const char *gen4_render_init(struct sna *sna, const char *backend);
-const char *gen5_render_init(struct sna *sna, const char *backend);
-const char *gen6_render_init(struct sna *sna, const char *backend);
-const char *gen7_render_init(struct sna *sna, const char *backend);
+void no_render_init(struct sna *sna);
+bool gen2_render_init(struct sna *sna);
+bool gen3_render_init(struct sna *sna);
+bool gen4_render_init(struct sna *sna);
+bool gen5_render_init(struct sna *sna);
+bool gen6_render_init(struct sna *sna);
+bool gen7_render_init(struct sna *sna);
 #if 0
 bool sna_tiling_composite(uint32_t op,
                           PicturePtr src,
                           PicturePtr mask,
 ,8 → 683,7
 bool
 sna_render_composite_redirect(struct sna *sna,
                               struct sna_composite_op *op,
-                              int x, int y, int width, int height,
-                              bool partial);
+                              int x, int y, int width, int height);
 void
 sna_render_composite_redirect_done(struct sna *sna,
 ,8 → 691,8
 bool
 sna_composite_mask_is_opaque(PicturePtr mask);
 #endif
 void sna_vertex_init(struct sna *sna);
 static inline void sna_vertex_lock(struct sna_render *r)
 ,69 → 726,4
         return was_active;
 }
-#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format),          \
-                                      PICT_FORMAT_TYPE(format),         \
-,                                \
-                                      PICT_FORMAT_R(format),            \
-                                      PICT_FORMAT_G(format),            \
-                                      PICT_FORMAT_B(format))
-static bool
-gen3_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp);
-static bool
-gen4_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp);
-static bool
-gen5_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp);
-static bool
-gen6_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp);
-static bool
-gen7_blit_tex(struct sna *sna,
-              uint8_t op, bool scale,
-                      PixmapPtr src, struct kgem_bo *src_bo,
-                      PixmapPtr mask,struct kgem_bo *mask_bo,
-                      PixmapPtr dst, struct kgem_bo *dst_bo,
-              int32_t src_x, int32_t src_y,
-              int32_t msk_x, int32_t msk_y,
-              int32_t dst_x, int32_t dst_y,
-              int32_t width, int32_t height,
-              struct sna_composite_op *tmp);
 #endif /* SNA_RENDER_H */

 /drivers/video/Intel-2D/brw/brw_eu.h
 ,6 → 2239,8
              struct brw_reg src0,
              struct brw_reg src1);
+void brw_print_reg(struct brw_reg reg);
 static inline void brw_math_invert(struct brw_compile *p,
                                    struct brw_reg dst,
                                    struct brw_reg src)

 /drivers/video/Intel-2D/compiler.h
 ,48 → 37,21
 #define must_check __attribute__((warn_unused_result))
 #define constant __attribute__((const))
 #define pure __attribute__((pure))
-#define tightly_packed __attribute__((__packed__))
+#define __packed__ __attribute__((__packed__))
 #define flatten __attribute__((flatten))
-#define page_aligned __attribute__((aligned(4096)))
 #else
 #define likely(expr) (expr)
 #define unlikely(expr) (expr)
 #define noinline
-#define force_inline inline
+#define force_inline
 #define fastcall
 #define must_check
 #define constant
 #define pure
-#define tighly_packed
+#define __packed__
 #define flatten
-#define page_aligned
 #endif
-#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor))
-#if HAS_GCC(4, 5)
-#define sse2 __attribute__((target("sse2,fpmath=sse")))
-#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
-#endif
-#if HAS_GCC(4, 7)
-#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
-#endif
-#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
-#define fast __attribute__((optimize("Ofast")))
-#else
-#define fast
-#endif
-#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
-#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
-#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
-#define fast_memcpy __attribute__((target("inline-all-stringops")))
-#else
-#define fast_memcpy
-#endif
 #ifdef HAVE_VALGRIND
 #define VG(x) x
 #else

 /drivers/video/Intel-2D/sna_render_inline.h
 ,7 → 17,7
                 dst->drawable.height > sna->render.max_3d_size);
 }
-static force_inline float pack_2s(int16_t x, int16_t y)
+static inline float pack_2s(int16_t x, int16_t y)
 {
         union {
                 struct sna_coordinate p;
 ,21 → 28,21
         return u.f;
 }
-static force_inline int vertex_space(struct sna *sna)
+static inline int vertex_space(struct sna *sna)
 {
         return sna->render.vertex_size - sna->render.vertex_used;
 }
-static force_inline void vertex_emit(struct sna *sna, float v)
+static inline void vertex_emit(struct sna *sna, float v)
 {
         assert(sna->render.vertex_used < sna->render.vertex_size);
         sna->render.vertices[sna->render.vertex_used++] = v;
 }
-static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
+static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
 {
         vertex_emit(sna, pack_2s(x, y));
 }
-static force_inline int batch_space(struct sna *sna)
+static inline int batch_space(struct sna *sna)
 {
         assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
         assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
 ,7 → 49,7
         return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
 }
-static force_inline void batch_emit(struct sna *sna, uint32_t dword)
+static inline void batch_emit(struct sna *sna, uint32_t dword)
 {
         assert(sna->kgem.mode != KGEM_NONE);
         assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
 ,7 → 56,7
         sna->kgem.batch[sna->kgem.nbatch++] = dword;
 }
-static force_inline void batch_emit_float(struct sna *sna, float f)
+static inline void batch_emit_float(struct sna *sna, float f)
 {
         union {
                 uint32_t dw;
 ,4 → 67,5
 }
 #endif /* SNA_RENDER_INLINE_H */

 /drivers/video/Intel-2D/gen4_vertex.c
 ,10 → 34,6
 #include "sna_render_inline.h"
 #include "gen4_vertex.h"
-#ifndef sse2
-#define sse2
-#endif
 void gen4_vertex_flush(struct sna *sna)
 {
     DBG(("%s[%x] = %d\n", __FUNCTION__,
 ,601 → 223,7
         kgem_bo_destroy(&sna->kgem, free_bo);
 }
-/* specialised vertex emission routines */
-#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
-#define OUT_VERTEX_F(v) vertex_emit(sna, v)
-force_inline static float
-compute_linear(const struct sna_composite_channel *channel,
-               int16_t x, int16_t y)
-{
-        return ((x+channel->offset[0]) * channel->u.linear.dx +
-                (y+channel->offset[1]) * channel->u.linear.dy +
-                channel->u.linear.offset);
-}
-sse2 inline static void
-emit_texcoord(struct sna *sna,
-              const struct sna_composite_channel *channel,
-              int16_t x, int16_t y)
-{
-        if (channel->is_solid) {
-                OUT_VERTEX_F(x);
-                return;
-        }
-        x += channel->offset[0];
-        y += channel->offset[1];
-        if (channel->is_affine) {
-                float s, t;
-                sna_get_transformed_coordinates(x, y,
-                                                channel->transform,
-                                                &s, &t);
-                OUT_VERTEX_F(s * channel->scale[0]);
-                OUT_VERTEX_F(t * channel->scale[1]);
-        } else {
-                float s, t, w;
-                sna_get_transformed_coordinates_3d(x, y,
-                                                   channel->transform,
-                                                   &s, &t, &w);
-                OUT_VERTEX_F(s * channel->scale[0]);
-                OUT_VERTEX_F(t * channel->scale[1]);
-                OUT_VERTEX_F(w);
-        }
-}
-sse2 force_inline static void
-emit_vertex(struct sna *sna,
-            const struct sna_composite_op *op,
-            int16_t srcX, int16_t srcY,
-            int16_t mskX, int16_t mskY,
-            int16_t dstX, int16_t dstY)
-{
-        OUT_VERTEX(dstX, dstY);
-        emit_texcoord(sna, &op->src, srcX, srcY);
-}
-sse2 fastcall static void
-emit_primitive(struct sna *sna,
-               const struct sna_composite_op *op,
-               const struct sna_composite_rectangles *r)
-{
-        emit_vertex(sna, op,
-                    r->src.x + r->width,  r->src.y + r->height,
-                    r->mask.x + r->width, r->mask.y + r->height,
-                    r->dst.x + r->width, r->dst.y + r->height);
-        emit_vertex(sna, op,
-                    r->src.x,  r->src.y + r->height,
-                    r->mask.x, r->mask.y + r->height,
-                    r->dst.x,  r->dst.y + r->height);
-        emit_vertex(sna, op,
-                    r->src.x,  r->src.y,
-                    r->mask.x, r->mask.y,
-                    r->dst.x,  r->dst.y);
-}
-sse2 force_inline static void
-emit_vertex_mask(struct sna *sna,
-                 const struct sna_composite_op *op,
-                 int16_t srcX, int16_t srcY,
-                 int16_t mskX, int16_t mskY,
-                 int16_t dstX, int16_t dstY)
-{
-        OUT_VERTEX(dstX, dstY);
-        emit_texcoord(sna, &op->src, srcX, srcY);
-        emit_texcoord(sna, &op->mask, mskX, mskY);
-}
-sse2 fastcall static void
-emit_primitive_mask(struct sna *sna,
-                    const struct sna_composite_op *op,
-                    const struct sna_composite_rectangles *r)
-{
-        emit_vertex_mask(sna, op,
-                         r->src.x + r->width,  r->src.y + r->height,
-                         r->mask.x + r->width, r->mask.y + r->height,
-                         r->dst.x + r->width, r->dst.y + r->height);
-        emit_vertex_mask(sna, op,
-                         r->src.x,  r->src.y + r->height,
-                         r->mask.x, r->mask.y + r->height,
-                         r->dst.x,  r->dst.y + r->height);
-        emit_vertex_mask(sna, op,
-                         r->src.x,  r->src.y,
-                         r->mask.x, r->mask.y,
-                         r->dst.x,  r->dst.y);
-}
-sse2 fastcall static void
-emit_primitive_solid(struct sna *sna,
-                     const struct sna_composite_op *op,
-                     const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        assert(op->floats_per_rect == 6);
-        assert((sna->render.vertex_used % 2) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 6;
-        assert(sna->render.vertex_used <= sna->render.vertex_size);
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[2] = dst.f;
-        dst.p.y = r->dst.y;
-        v[4] = dst.f;
-        v[5] = v[3] = v[1] = .5;
-}
-sse2 fastcall static void
-emit_boxes_solid(const struct sna_composite_op *op,
-                 const BoxRec *box, int nbox,
-                 float *v)
-{
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[2] = dst.f;
-                dst.p.y = box->y1;
-                v[4] = dst.f;
-                v[5] = v[3] = v[1] = .5;
-                box++;
-                v += 6;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_linear(struct sna *sna,
-                      const struct sna_composite_op *op,
-                      const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        assert(op->floats_per_rect == 6);
-        assert((sna->render.vertex_used % 2) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 6;
-        assert(sna->render.vertex_used <= sna->render.vertex_size);
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[2] = dst.f;
-        dst.p.y = r->dst.y;
-        v[4] = dst.f;
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-sse2 fastcall static void
-emit_boxes_linear(const struct sna_composite_op *op,
-                  const BoxRec *box, int nbox,
-                  float *v)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        do {
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[2] = dst.f;
-                dst.p.y = box->y1;
-                v[4] = dst.f;
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[3] = compute_linear(&op->src, box->x1, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y1);
-                v += 6;
-                box++;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_identity_source(struct sna *sna,
-                               const struct sna_composite_op *op,
-                               const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float *v;
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 9;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
-        v[1] = v[4] + r->width * op->src.scale[0];
-        v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
-        v[5] = v[2] = v[8] + r->height * op->src.scale[1];
-}
-sse2 fastcall static void
-emit_boxes_identity_source(const struct sna_composite_op *op,
-                           const BoxRec *box, int nbox,
-                           float *v)
-{
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
-                v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
-                v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
-                v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_simple_source(struct sna *sna,
-                             const struct sna_composite_op *op,
-                             const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 3*3;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-        v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[8] = ((r->src.y + ty) * yy + y0) * sy;
-}
-sse2 fastcall static void
-emit_boxes_simple_source(const struct sna_composite_op *op,
-                         const BoxRec *box, int nbox,
-                         float *v)
-{
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[1] = ((box->x2 + tx) * xx + x0) * sx;
-                v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[8] = ((box->y1 + ty) * yy + y0) * sy;
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_affine_source(struct sna *sna,
-                             const struct sna_composite_op *op,
-                             const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float *v;
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 9;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
-                                    op->src.offset[1] + r->src.y + r->height,
-                                    op->src.transform, op->src.scale,
-                                    &v[1], &v[2]);
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-                                    op->src.offset[1] + r->src.y + r->height,
-                                    op->src.transform, op->src.scale,
-                                    &v[4], &v[5]);
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-                                    op->src.offset[1] + r->src.y,
-                                    op->src.transform, op->src.scale,
-                                    &v[7], &v[8]);
-}
-sse2 fastcall static void
-emit_boxes_affine_source(const struct sna_composite_op *op,
-                         const BoxRec *box, int nbox,
-                         float *v)
-{
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                _sna_get_transformed_scaled(op->src.offset[0] + box->x2,
-                                            op->src.offset[1] + box->y2,
-                                            op->src.transform, op->src.scale,
-                                            &v[1], &v[2]);
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
-                                            op->src.offset[1] + box->y2,
-                                            op->src.transform, op->src.scale,
-                                            &v[4], &v[5]);
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                _sna_get_transformed_scaled(op->src.offset[0] + box->x1,
-                                            op->src.offset[1] + box->y1,
-                                            op->src.transform, op->src.scale,
-                                            &v[7], &v[8]);
-                box++;
-                v += 9;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_identity_mask(struct sna *sna,
-                             const struct sna_composite_op *op,
-                             const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[9] = v[5] = v[1] = .5;
-}
-sse2 fastcall static void
-emit_boxes_identity_mask(const struct sna_composite_op *op,
-                         const BoxRec *box, int nbox,
-                         float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[9] = v[5] = v[1] = .5;
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-sse2 fastcall static void
-emit_primitive_linear_identity_mask(struct sna *sna,
-                                    const struct sna_composite_op *op,
-                                    const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[9] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-sse2 fastcall static void
-emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
-                                const BoxRec *box, int nbox,
-                                float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y2);
-                v[9] = compute_linear(&op->src, box->x1, box->y1);
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-sse2 fastcall static void
+fastcall static void
 emit_primitive_identity_source_mask(struct sna *sna,
                                     const struct sna_composite_op *op,
                                     const struct sna_composite_rectangles *r)
 ,831 → 272,8
         v[14] = msk_y * op->mask.scale[1];
 }
-sse2 fastcall static void
-emit_primitive_simple_source_identity(struct sna *sna,
-                                      const struct sna_composite_op *op,
-                                      const struct sna_composite_rectangles *r)
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
 {
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        float msk_x = r->mask.x + op->mask.offset[0];
-        float msk_y = r->mask.y + op->mask.offset[1];
-        float w = r->width, h = r->height;
-        assert(op->floats_per_rect == 15);
-        assert((sna->render.vertex_used % 5) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 3*5;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-        v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-        v[3] = (msk_x + w) * op->mask.scale[0];
-        v[4] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[5] = dst.f;
-        v[6] = ((r->src.x + tx) * xx + x0) * sx;
-        v[7] = v[2];
-        v[8] = msk_x * op->mask.scale[0];
-        v[9] = v[4];
-        dst.p.y = r->dst.y;
-        v[10] = dst.f;
-        v[11] = v[6];
-        v[12] = ((r->src.y + ty) * yy + y0) * sy;
-        v[13] = v[8];
-        v[14] = msk_y * op->mask.scale[1];
-}
-sse2 fastcall static void
-emit_primitive_affine_source_identity(struct sna *sna,
-                                      const struct sna_composite_op *op,
-                                      const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x = r->mask.x + op->mask.offset[0];
-        float msk_y = r->mask.y + op->mask.offset[1];
-        float w = r->width, h = r->height;
-        assert(op->floats_per_rect == 15);
-        assert((sna->render.vertex_used % 5) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 3*5;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
-                                    op->src.offset[1] + r->src.y + r->height,
-                                    op->src.transform, op->src.scale,
-                                    &v[1], &v[2]);
-        v[3] = (msk_x + w) * op->mask.scale[0];
-        v[4] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[5] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-                                    op->src.offset[1] + r->src.y + r->height,
-                                    op->src.transform, op->src.scale,
-                                    &v[6], &v[7]);
-        v[8] = msk_x * op->mask.scale[0];
-        v[9] = v[4];
-        dst.p.y = r->dst.y;
-        v[10] = dst.f;
-        _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-                                    op->src.offset[1] + r->src.y,
-                                    op->src.transform, op->src.scale,
-                                    &v[11], &v[12]);
-        v[13] = v[8];
-        v[14] = msk_y * op->mask.scale[1];
-}
-/* SSE4_2 */
-#if defined(sse4_2)
-sse4_2 fastcall static void
-emit_primitive_linear__sse4_2(struct sna *sna,
-                              const struct sna_composite_op *op,
-                              const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        assert(op->floats_per_rect == 6);
-        assert((sna->render.vertex_used % 2) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 6;
-        assert(sna->render.vertex_used <= sna->render.vertex_size);
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[2] = dst.f;
-        dst.p.y = r->dst.y;
-        v[4] = dst.f;
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-sse4_2 fastcall static void
-emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
-                          const BoxRec *box, int nbox,
-                          float *v)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        do {
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[2] = dst.f;
-                dst.p.y = box->y1;
-                v[4] = dst.f;
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[3] = compute_linear(&op->src, box->x1, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y1);
-                v += 6;
-                box++;
-        } while (--nbox);
-}
-sse4_2 fastcall static void
-emit_primitive_identity_source__sse4_2(struct sna *sna,
-                                       const struct sna_composite_op *op,
-                                       const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float *v;
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 9;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
-        v[1] = v[4] + r->width * op->src.scale[0];
-        v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
-        v[5] = v[2] = v[8] + r->height * op->src.scale[1];
-}
-sse4_2 fastcall static void
-emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
-                                   const BoxRec *box, int nbox,
-                                   float *v)
-{
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
-                v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
-                v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
-                v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-sse4_2 fastcall static void
-emit_primitive_simple_source__sse4_2(struct sna *sna,
-                                     const struct sna_composite_op *op,
-                                     const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 3*3;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-        v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[8] = ((r->src.y + ty) * yy + y0) * sy;
-}
-sse4_2 fastcall static void
-emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
-                                 const BoxRec *box, int nbox,
-                                 float *v)
-{
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[1] = ((box->x2 + tx) * xx + x0) * sx;
-                v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[8] = ((box->y1 + ty) * yy + y0) * sy;
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-sse4_2 fastcall static void
-emit_primitive_identity_mask__sse4_2(struct sna *sna,
-                                     const struct sna_composite_op *op,
-                                     const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[9] = v[5] = v[1] = .5;
-}
-sse4_2 fastcall static void
-emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
-                                 const BoxRec *box, int nbox,
-                                 float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[9] = v[5] = v[1] = .5;
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-sse4_2 fastcall static void
-emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
-                                            const struct sna_composite_op *op,
-                                            const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[9] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-sse4_2 fastcall static void
-emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
-                                        const BoxRec *box, int nbox,
-                                        float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y2);
-                v[9] = compute_linear(&op->src, box->x1, box->y1);
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-#endif
-/* AVX2 */
-#if defined(avx2)
-avx2 fastcall static void
-emit_primitive_linear__avx2(struct sna *sna,
-                            const struct sna_composite_op *op,
-                            const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        assert(op->floats_per_rect == 6);
-        assert((sna->render.vertex_used % 2) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 6;
-        assert(sna->render.vertex_used <= sna->render.vertex_size);
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[2] = dst.f;
-        dst.p.y = r->dst.y;
-        v[4] = dst.f;
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-avx2 fastcall static void
-emit_boxes_linear__avx2(const struct sna_composite_op *op,
-                        const BoxRec *box, int nbox,
-                        float *v)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        do {
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[2] = dst.f;
-                dst.p.y = box->y1;
-                v[4] = dst.f;
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[3] = compute_linear(&op->src, box->x1, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y1);
-                v += 6;
-                box++;
-        } while (--nbox);
-}
-avx2 fastcall static void
-emit_primitive_identity_source__avx2(struct sna *sna,
-                                     const struct sna_composite_op *op,
-                                     const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float *v;
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 9;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
-        v[1] = v[4] + r->width * op->src.scale[0];
-        v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
-        v[5] = v[2] = v[8] + r->height * op->src.scale[1];
-}
-avx2 fastcall static void
-emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
-                                 const BoxRec *box, int nbox,
-                                 float *v)
-{
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
-                v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
-                v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
-                v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-avx2 fastcall static void
-emit_primitive_simple_source__avx2(struct sna *sna,
-                                   const struct sna_composite_op *op,
-                                   const struct sna_composite_rectangles *r)
-{
-        float *v;
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        assert(op->floats_per_rect == 9);
-        assert((sna->render.vertex_used % 3) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 3*3;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-        v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-        dst.p.x = r->dst.x;
-        v[3] = dst.f;
-        v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
-        dst.p.y = r->dst.y;
-        v[6] = dst.f;
-        v[8] = ((r->src.y + ty) * yy + y0) * sy;
-}
-avx2 fastcall static void
-emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
-                               const BoxRec *box, int nbox,
-                               float *v)
-{
-        float xx = op->src.transform->matrix[0][0];
-        float x0 = op->src.transform->matrix[0][2];
-        float yy = op->src.transform->matrix[1][1];
-        float y0 = op->src.transform->matrix[1][2];
-        float sx = op->src.scale[0];
-        float sy = op->src.scale[1];
-        int16_t tx = op->src.offset[0];
-        int16_t ty = op->src.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[1] = ((box->x2 + tx) * xx + x0) * sx;
-                v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
-                dst.p.x = box->x1;
-                v[3] = dst.f;
-                v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
-                dst.p.y = box->y1;
-                v[6] = dst.f;
-                v[8] = ((box->y1 + ty) * yy + y0) * sy;
-                v += 9;
-                box++;
-        } while (--nbox);
-}
-avx2 fastcall static void
-emit_primitive_identity_mask__avx2(struct sna *sna,
-                                   const struct sna_composite_op *op,
-                                   const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[9] = v[5] = v[1] = .5;
-}
-avx2 fastcall static void
-emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
-                               const BoxRec *box, int nbox,
-                               float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[9] = v[5] = v[1] = .5;
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-avx2 fastcall static void
-emit_primitive_linear_identity_mask__avx2(struct sna *sna,
-                                          const struct sna_composite_op *op,
-                                          const struct sna_composite_rectangles *r)
-{
-        union {
-                struct sna_coordinate p;
-                float f;
-        } dst;
-        float msk_x, msk_y;
-        float w, h;
-        float *v;
-        msk_x = r->mask.x + op->mask.offset[0];
-        msk_y = r->mask.y + op->mask.offset[1];
-        w = r->width;
-        h = r->height;
-        DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-             __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
-        assert(op->floats_per_rect == 12);
-        assert((sna->render.vertex_used % 4) == 0);
-        v = sna->render.vertices + sna->render.vertex_used;
-        sna->render.vertex_used += 12;
-        dst.p.x = r->dst.x + r->width;
-        dst.p.y = r->dst.y + r->height;
-        v[0] = dst.f;
-        v[2] = (msk_x + w) * op->mask.scale[0];
-        v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
-        dst.p.x = r->dst.x;
-        v[4] = dst.f;
-        v[10] = v[6] = msk_x * op->mask.scale[0];
-        dst.p.y = r->dst.y;
-        v[8] = dst.f;
-        v[11] = msk_y * op->mask.scale[1];
-        v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-        v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-        v[9] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-avx2 fastcall static void
-emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
-                                      const BoxRec *box, int nbox,
-                                      float *v)
-{
-        float msk_x = op->mask.offset[0];
-        float msk_y = op->mask.offset[1];
-        do {
-                union {
-                        struct sna_coordinate p;
-                        float f;
-                } dst;
-                dst.p.x = box->x2;
-                dst.p.y = box->y2;
-                v[0] = dst.f;
-                v[2] = (msk_x + box->x2) * op->mask.scale[0];
-                v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-                dst.p.x = box->x1;
-                v[4] = dst.f;
-                v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-                dst.p.y = box->y1;
-                v[8] = dst.f;
-                v[11] = (msk_y + box->y1) * op->mask.scale[1];
-                v[1] = compute_linear(&op->src, box->x2, box->y2);
-                v[5] = compute_linear(&op->src, box->x1, box->y2);
-                v[9] = compute_linear(&op->src, box->x1, box->y1);
-                v += 12;
-                box++;
-        } while (--nbox);
-}
-#endif
-unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
-{
         unsigned vb;
         if (tmp->mask.bo) {
 ,38 → 280,8
                 if (tmp->mask.transform == NULL) {
                         if (tmp->src.is_solid) {
                                 DBG(("%s: solid, identity mask\n", __FUNCTION__));
-#if defined(avx2)
-                                if (sna->cpu_features & AVX2) {
-                                        tmp->prim_emit = emit_primitive_identity_mask__avx2;
-                                } else
-#endif
-#if defined(sse4_2)
-                                if (sna->cpu_features & SSE4_2) {
-                                        tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = emit_primitive_identity_mask;
-                                }
-                                tmp->floats_per_vertex = 4;
-                                vb = 1 | 2 << 2;
                         } else if (tmp->src.is_linear) {
                                 DBG(("%s: linear, identity mask\n", __FUNCTION__));
-#if defined(avx2)
-                                if (sna->cpu_features & AVX2) {
-                                        tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
-                                } else
-#endif
-#if defined(sse4_2)
-                                if (sna->cpu_features & SSE4_2) {
-                                        tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = emit_primitive_linear_identity_mask;
-                                }
-                                tmp->floats_per_vertex = 4;
-                                vb = 1 | 2 << 2;
                         } else if (tmp->src.transform == NULL) {
                                 DBG(("%s: identity source, identity mask\n", __FUNCTION__));
                                 tmp->prim_emit = emit_primitive_identity_source_mask;
 ,130 → 288,17
                                 tmp->floats_per_vertex = 5;
                                 vb = 2 << 2 | 2;
                         } else if (tmp->src.is_affine) {
-                                tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-                                tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
-                                if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
                                         DBG(("%s: simple src, identity mask\n", __FUNCTION__));
-                                        tmp->prim_emit = emit_primitive_simple_source_identity;
                                 } else {
-                                        DBG(("%s: affine src, identity mask\n", __FUNCTION__));
-                                        tmp->prim_emit = emit_primitive_affine_source_identity;
-                                }
-                                tmp->floats_per_vertex = 5;
-                                vb = 2 << 2 | 2;
-                        } else {
                                 DBG(("%s: projective source, identity mask\n", __FUNCTION__));
-                                tmp->prim_emit = emit_primitive_mask;
-                                tmp->floats_per_vertex = 6;
-                                vb = 2 << 2 | 3;
                         }
                 } else {
-                        tmp->prim_emit = emit_primitive_mask;
-                        tmp->floats_per_vertex = 1;
-                        vb = 0;
-                        if (tmp->mask.is_solid) {
-                                tmp->floats_per_vertex += 1;
-                                vb |= 1 << 2;
-                        } else if (tmp->mask.is_affine) {
-                                tmp->floats_per_vertex += 2;
-                                vb |= 2 << 2;
-                        }else {
-                                tmp->floats_per_vertex += 3;
-                                vb |= 3 << 2;
-                        }
-                        if (tmp->src.is_solid) {
-                                tmp->floats_per_vertex += 1;
-                                vb |= 1;
-                        } else if (tmp->src.is_affine) {
-                                tmp->floats_per_vertex += 2;
-                                vb |= 2 ;
-                        }else {
-                                tmp->floats_per_vertex += 3;
-                                vb |= 3;
-                        }
                         DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
                              __FUNCTION__,tmp->floats_per_vertex, vb));
                 }
         } else {
-#if 0
-                if (tmp->src.is_solid) {
-                        DBG(("%s: solid, no mask\n", __FUNCTION__));
-                        tmp->prim_emit = emit_primitive_solid;
-                        if (tmp->src.is_opaque && tmp->op == PictOpOver)
-                                tmp->op = PictOpSrc;
-                        tmp->floats_per_vertex = 2;
-                        vb = 1;
-                } else if (tmp->src.is_linear) {
-                        DBG(("%s: linear, no mask\n", __FUNCTION__));
-#if defined(avx2)
-                        if (sna->cpu_features & AVX2) {
-                                tmp->prim_emit = emit_primitive_linear__avx2;
-                        } else
-#endif
-#if defined(sse4_2)
-                        if (sna->cpu_features & SSE4_2) {
-                                tmp->prim_emit = emit_primitive_linear__sse4_2;
-                        } else
-#endif
-                        {
-                                tmp->prim_emit = emit_primitive_linear;
                         }
-                        tmp->floats_per_vertex = 2;
-                        vb = 1;
-                } else if (tmp->src.transform == NULL) {
-                        DBG(("%s: identity src, no mask\n", __FUNCTION__));
-#if defined(avx2)
-                        if (sna->cpu_features & AVX2) {
-                                tmp->prim_emit = emit_primitive_identity_source__avx2;
-                        } else
-#endif
-#if defined(sse4_2)
-                        if (sna->cpu_features & SSE4_2) {
-                                tmp->prim_emit = emit_primitive_identity_source__sse4_2;
-                        } else
-#endif
-                        {
-                                tmp->prim_emit = emit_primitive_identity_source;
-                        }
-                        tmp->floats_per_vertex = 3;
-                        vb = 2;
-                } else if (tmp->src.is_affine) {
-                        tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
-                        tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
-                        if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
-                                DBG(("%s: simple src, no mask\n", __FUNCTION__));
-#if defined(avx2)
-                                if (sna->cpu_features & AVX2) {
-                                        tmp->prim_emit = emit_primitive_simple_source__avx2;
-                                } else
-#endif
-#if defined(sse4_2)
-                                if (sna->cpu_features & SSE4_2) {
-                                        tmp->prim_emit = emit_primitive_simple_source__sse4_2;
-                                } else
-#endif
-                                {
-                                        tmp->prim_emit = emit_primitive_simple_source;
-                                }
-                        } else {
-                                DBG(("%s: affine src, no mask\n", __FUNCTION__));
-                                tmp->prim_emit = emit_primitive_affine_source;
-                        }
-                        tmp->floats_per_vertex = 3;
-                        vb = 2;
-                } else {
-                        DBG(("%s: projective src, no mask\n", __FUNCTION__));
-                        assert(!tmp->src.is_solid);
-                        tmp->prim_emit = emit_primitive;
-                        tmp->floats_per_vertex = 4;
-                        vb = 3;
-                }
-#endif
-        }
         tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
         return vb;
 }

 /drivers/video/Intel-2D/gen7_render.h
 ,8 → 1224,6
 /* Surface state DW0 */
 #define GEN7_SURFACE_RC_READ_WRITE      (1 << 8)
-#define GEN7_SURFACE_VALIGN_4           (1 << 16)
-#define GEN7_SURFACE_HALIGN_8           (1 << 15)
 #define GEN7_SURFACE_TILED              (1 << 14)
 #define GEN7_SURFACE_TILED_Y            (1 << 13)
 #define GEN7_SURFACE_FORMAT_SHIFT       18

 /drivers/video/Intel-2D/gen4_vertex.h
 ,6 → 10,7
 int gen4_vertex_finish(struct sna *sna);
 void gen4_vertex_close(struct sna *sna);
-unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp);
+unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
+//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
 #endif /* GEN4_VERTEX_H */

 /drivers/video/Intel-2D/intel_driver.h
 ,52 → 9,180
 #define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR
 #define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL
+#ifndef PCI_CHIP_I810
 #define PCI_CHIP_I810              0x7121
 #define PCI_CHIP_I810_DC100        0x7123
 #define PCI_CHIP_I810_E            0x7125
 #define PCI_CHIP_I815              0x1132
+#define PCI_CHIP_I810_BRIDGE       0x7120
+#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
+#define PCI_CHIP_I810_E_BRIDGE     0x7124
+#define PCI_CHIP_I815_BRIDGE       0x1130
+#endif
+#ifndef PCI_CHIP_I830_M
 #define PCI_CHIP_I830_M            0x3577
+#define PCI_CHIP_I830_M_BRIDGE     0x3575
+#endif
+#ifndef PCI_CHIP_845_G
 #define PCI_CHIP_845_G             0x2562
+#define PCI_CHIP_845_G_BRIDGE      0x2560
+#endif
+#ifndef PCI_CHIP_I854
 #define PCI_CHIP_I854              0x358E
+#define PCI_CHIP_I854_BRIDGE       0x358C
+#endif
+#ifndef PCI_CHIP_I855_GM
 #define PCI_CHIP_I855_GM           0x3582
+#define PCI_CHIP_I855_GM_BRIDGE    0x3580
+#endif
+#ifndef PCI_CHIP_I865_G
 #define PCI_CHIP_I865_G            0x2572
+#define PCI_CHIP_I865_G_BRIDGE     0x2570
+#endif
+#ifndef PCI_CHIP_I915_G
 #define PCI_CHIP_I915_G            0x2582
+#define PCI_CHIP_I915_G_BRIDGE     0x2580
+#endif
+#ifndef PCI_CHIP_I915_GM
 #define PCI_CHIP_I915_GM           0x2592
+#define PCI_CHIP_I915_GM_BRIDGE    0x2590
+#endif
+#ifndef PCI_CHIP_E7221_G
 #define PCI_CHIP_E7221_G           0x258A
+/* Same as I915_G_BRIDGE */
+#define PCI_CHIP_E7221_G_BRIDGE    0x2580
+#endif
+#ifndef PCI_CHIP_I945_G
 #define PCI_CHIP_I945_G        0x2772
+#define PCI_CHIP_I945_G_BRIDGE 0x2770
+#endif
+#ifndef PCI_CHIP_I945_GM
 #define PCI_CHIP_I945_GM        0x27A2
+#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
+#endif
+#ifndef PCI_CHIP_I945_GME
 #define PCI_CHIP_I945_GME        0x27AE
+#define PCI_CHIP_I945_GME_BRIDGE 0x27AC
+#endif
+#ifndef PCI_CHIP_PINEVIEW_M
 #define PCI_CHIP_PINEVIEW_M             0xA011
+#define PCI_CHIP_PINEVIEW_M_BRIDGE      0xA010
 #define PCI_CHIP_PINEVIEW_G             0xA001
-#define PCI_CHIP_Q35_G          0x29B2
-#define PCI_CHIP_G33_G          0x29C2
-#define PCI_CHIP_Q33_G          0x29D2
+#define PCI_CHIP_PINEVIEW_G_BRIDGE      0xA000
+#endif
+#ifndef PCI_CHIP_G35_G
 #define PCI_CHIP_G35_G          0x2982
+#define PCI_CHIP_G35_G_BRIDGE   0x2980
+#endif
+#ifndef PCI_CHIP_I965_Q
 #define PCI_CHIP_I965_Q         0x2992
+#define PCI_CHIP_I965_Q_BRIDGE  0x2990
+#endif
+#ifndef PCI_CHIP_I965_G
 #define PCI_CHIP_I965_G         0x29A2
+#define PCI_CHIP_I965_G_BRIDGE  0x29A0
+#endif
+#ifndef PCI_CHIP_I946_GZ
 #define PCI_CHIP_I946_GZ                0x2972
+#define PCI_CHIP_I946_GZ_BRIDGE         0x2970
+#endif
+#ifndef PCI_CHIP_I965_GM
 #define PCI_CHIP_I965_GM        0x2A02
+#define PCI_CHIP_I965_GM_BRIDGE 0x2A00
+#endif
+#ifndef PCI_CHIP_I965_GME
 #define PCI_CHIP_I965_GME       0x2A12
+#define PCI_CHIP_I965_GME_BRIDGE 0x2A10
+#endif
+#ifndef PCI_CHIP_G33_G
+#define PCI_CHIP_G33_G          0x29C2
+#define PCI_CHIP_G33_G_BRIDGE   0x29C0
+#endif
+#ifndef PCI_CHIP_Q35_G
+#define PCI_CHIP_Q35_G          0x29B2
+#define PCI_CHIP_Q35_G_BRIDGE   0x29B0
+#endif
+#ifndef PCI_CHIP_Q33_G
+#define PCI_CHIP_Q33_G          0x29D2
+#define PCI_CHIP_Q33_G_BRIDGE   0x29D0
+#endif
+#ifndef PCI_CHIP_GM45_GM
 #define PCI_CHIP_GM45_GM        0x2A42
+#define PCI_CHIP_GM45_BRIDGE    0x2A40
+#endif
+#ifndef PCI_CHIP_G45_E_G
 #define PCI_CHIP_G45_E_G        0x2E02
+#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00
+#endif
+#ifndef PCI_CHIP_G45_G
 #define PCI_CHIP_G45_G          0x2E22
+#define PCI_CHIP_G45_G_BRIDGE   0x2E20
+#endif
+#ifndef PCI_CHIP_Q45_G
 #define PCI_CHIP_Q45_G          0x2E12
+#define PCI_CHIP_Q45_G_BRIDGE   0x2E10
+#endif
+#ifndef PCI_CHIP_G41_G
 #define PCI_CHIP_G41_G          0x2E32
+#define PCI_CHIP_G41_G_BRIDGE   0x2E30
+#endif
+#ifndef PCI_CHIP_B43_G
 #define PCI_CHIP_B43_G          0x2E42
+#define PCI_CHIP_B43_G_BRIDGE   0x2E40
+#endif
+#ifndef PCI_CHIP_B43_G1
 #define PCI_CHIP_B43_G1         0x2E92
+#define PCI_CHIP_B43_G1_BRIDGE  0x2E90
+#endif
+#ifndef PCI_CHIP_IRONLAKE_D_G
 #define PCI_CHIP_IRONLAKE_D_G           0x0042
+#define PCI_CHIP_IRONLAKE_D_G_BRIDGE    0x0040
+#endif
+#ifndef PCI_CHIP_IRONLAKE_M_G
 #define PCI_CHIP_IRONLAKE_M_G           0x0046
+#define PCI_CHIP_IRONLAKE_M_G_BRIDGE    0x0044
+#endif
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE     0x0100  /* Desktop */
 #define PCI_CHIP_SANDYBRIDGE_GT1        0x0102
 #define PCI_CHIP_SANDYBRIDGE_GT2        0x0112
 #define PCI_CHIP_SANDYBRIDGE_GT2_PLUS   0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M   0x0104  /* Mobile */
 #define PCI_CHIP_SANDYBRIDGE_M_GT1      0x0106
 #define PCI_CHIP_SANDYBRIDGE_M_GT2      0x0116
 #define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S   0x0108  /* Server */
 #define PCI_CHIP_SANDYBRIDGE_S_GT       0x010A
 #define PCI_CHIP_IVYBRIDGE_M_GT1        0x0156
 ,52 → 194,88
 #define PCI_CHIP_HASWELL_D_GT1          0x0402
 #define PCI_CHIP_HASWELL_D_GT2          0x0412
-#define PCI_CHIP_HASWELL_D_GT3          0x0422
+#define PCI_CHIP_HASWELL_D_GT2_PLUS     0x0422
 #define PCI_CHIP_HASWELL_M_GT1          0x0406
 #define PCI_CHIP_HASWELL_M_GT2          0x0416
-#define PCI_CHIP_HASWELL_M_GT3          0x0426
+#define PCI_CHIP_HASWELL_M_GT2_PLUS     0x0426
 #define PCI_CHIP_HASWELL_S_GT1          0x040A
 #define PCI_CHIP_HASWELL_S_GT2          0x041A
-#define PCI_CHIP_HASWELL_S_GT3          0x042A
-#define PCI_CHIP_HASWELL_B_GT1          0x040B
-#define PCI_CHIP_HASWELL_B_GT2          0x041B
-#define PCI_CHIP_HASWELL_B_GT3          0x042B
-#define PCI_CHIP_HASWELL_E_GT1          0x040E
-#define PCI_CHIP_HASWELL_E_GT2          0x041E
-#define PCI_CHIP_HASWELL_E_GT3          0x042E
+#define PCI_CHIP_HASWELL_S_GT2_PLUS     0x042A
+#define PCI_CHIP_HASWELL_SDV_D_GT1      0x0C02
+#define PCI_CHIP_HASWELL_SDV_D_GT2      0x0C12
+#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22
+#define PCI_CHIP_HASWELL_SDV_M_GT1      0x0C06
+#define PCI_CHIP_HASWELL_SDV_M_GT2      0x0C16
+#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
+#define PCI_CHIP_HASWELL_SDV_S_GT1      0x0C0A
+#define PCI_CHIP_HASWELL_SDV_S_GT2      0x0C1A
+#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
 #define PCI_CHIP_HASWELL_ULT_D_GT1      0x0A02
 #define PCI_CHIP_HASWELL_ULT_D_GT2      0x0A12
-#define PCI_CHIP_HASWELL_ULT_D_GT3      0x0A22
+#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22
 #define PCI_CHIP_HASWELL_ULT_M_GT1      0x0A06
 #define PCI_CHIP_HASWELL_ULT_M_GT2      0x0A16
-#define PCI_CHIP_HASWELL_ULT_M_GT3      0x0A26
+#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
 #define PCI_CHIP_HASWELL_ULT_S_GT1      0x0A0A
 #define PCI_CHIP_HASWELL_ULT_S_GT2      0x0A1A
-#define PCI_CHIP_HASWELL_ULT_S_GT3      0x0A2A
-#define PCI_CHIP_HASWELL_ULT_B_GT1      0x0A0B
-#define PCI_CHIP_HASWELL_ULT_B_GT2      0x0A1B
-#define PCI_CHIP_HASWELL_ULT_B_GT3      0x0A2B
-#define PCI_CHIP_HASWELL_ULT_E_GT1      0x0A0E
-#define PCI_CHIP_HASWELL_ULT_E_GT2      0x0A1E
-#define PCI_CHIP_HASWELL_ULT_E_GT3      0x0A2E
+#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
+#define PCI_CHIP_HASWELL_CRW_D_GT1      0x0D12
+#define PCI_CHIP_HASWELL_CRW_D_GT2      0x0D22
+#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32
+#define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D16
+#define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D26
+#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
+#define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D1A
+#define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D2A
+#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
-#define PCI_CHIP_HASWELL_CRW_D_GT1      0x0D02
-#define PCI_CHIP_HASWELL_CRW_D_GT2      0x0D12
-#define PCI_CHIP_HASWELL_CRW_D_GT3      0x0D22
-#define PCI_CHIP_HASWELL_CRW_M_GT1      0x0D06
-#define PCI_CHIP_HASWELL_CRW_M_GT2      0x0D16
-#define PCI_CHIP_HASWELL_CRW_M_GT3      0x0D26
-#define PCI_CHIP_HASWELL_CRW_S_GT1      0x0D0A
-#define PCI_CHIP_HASWELL_CRW_S_GT2      0x0D1A
-#define PCI_CHIP_HASWELL_CRW_S_GT3      0x0D2A
-#define PCI_CHIP_HASWELL_CRW_B_GT1      0x0D0B
-#define PCI_CHIP_HASWELL_CRW_B_GT2      0x0D1B
-#define PCI_CHIP_HASWELL_CRW_B_GT3      0x0D2B
-#define PCI_CHIP_HASWELL_CRW_E_GT1      0x0D0E
-#define PCI_CHIP_HASWELL_CRW_E_GT2      0x0D1E
-#define PCI_CHIP_HASWELL_CRW_E_GT3      0x0D2E
+#define PCI_CHIP_VALLEYVIEW_PO          0x0f30
+#define PCI_CHIP_VALLEYVIEW_1           0x0f31
+#define PCI_CHIP_VALLEYVIEW_2           0x0f32
+#define PCI_CHIP_VALLEYVIEW_3           0x0f33
+#endif
+#define I85X_CAPID                      0x44
+#define I85X_VARIANT_MASK                       0x7
+#define I85X_VARIANT_SHIFT                      5
+#define I855_GME                                0x0
+#define I855_GM                                 0x4
+#define I852_GME                                0x2
+#define I852_GM                                 0x5
+#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr
+#define VENDOR_ID(p)      (p)->vendor_id
+#define DEVICE_ID(p)      (p)->device_id
+#define SUBVENDOR_ID(p)   (p)->subvendor_id
+#define SUBSYS_ID(p)      (p)->subdevice_id
+#define CHIP_REVISION(p)  (p)->revision
+#define INTEL_INFO(intel) ((intel)->info)
+#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
+#define IS_GEN1(intel) IS_GENx(intel, 1)
+#define IS_GEN2(intel) IS_GENx(intel, 2)
+#define IS_GEN3(intel) IS_GENx(intel, 3)
+#define IS_GEN4(intel) IS_GENx(intel, 4)
+#define IS_GEN5(intel) IS_GENx(intel, 5)
+#define IS_GEN6(intel) IS_GENx(intel, 6)
+#define IS_GEN7(intel) IS_GENx(intel, 7)
+#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
+/* Some chips have specific errata (or limits) that we need to workaround. */
+#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
+#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G)
+#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G)
+#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G)
+#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM)
+#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
+/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
+#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
+#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
 struct intel_device_info {
         int gen;
 };

/drivers/video/Intel-2D/intel_list.h
107,7 → 107,6
* There are no requirements for a list head, any struct list can be a list
* head.
*/

struct list {
struct list next, prev;
};

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4251 → Rev 4250