WebSVN – Kolibri OS – Path Comparison – / – /drivers/ Rev 3298 and /drivers/ Rev 3299

Regard whitespace Rev 3298 → Rev 3299

 /drivers/video/Intel-2D/brw/brw_sf.c
 ,0 → 1,54
+#include "brw.h"
+bool brw_sf_kernel__nomask(struct brw_compile *p)
+{
+        struct brw_reg inv, v0, v1, v2, delta;
+        v0 = brw_vec4_grf(3, 0);
+        v1 = brw_vec4_grf(4, 0);
+        v2 = brw_vec4_grf(5, 0);
+        delta = brw_vec8_grf(7, 0);
+        inv = brw_vec4_grf(6, 0);
+        brw_math_invert(p, inv, brw_vec4_grf(1, 11));
+        brw_MOV(p, brw_message_reg(3), v0);
+        brw_ADD(p, delta, v1, brw_negate(v2));
+        brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0));
+        brw_ADD(p, delta, v2, brw_negate(v0));
+        brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2));
+        brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
+                      false, true, 4, 0, true, true, 0,
+                      BRW_URB_SWIZZLE_TRANSPOSE);
+        return true;
+}
+bool brw_sf_kernel__mask(struct brw_compile *p)
+{
+        struct brw_reg inv, v0, v1, v2;
+        v0 = brw_vec8_grf(3, 0);
+        v1 = brw_vec8_grf(4, 0);
+        v2 = brw_vec8_grf(5, 0);
+        inv = brw_vec4_grf(6, 0);
+        brw_math_invert(p, inv, brw_vec4_grf(1, 11));
+        brw_MOV(p, brw_message_reg(3), v0);
+        brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2));
+        brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0));
+        brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0));
+        brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2));
+        brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
+                      false, true, 4, 0, true, true, 0,
+                      BRW_URB_SWIZZLE_TRANSPOSE);
+        return true;
+}

 /drivers/video/Intel-2D/gen3_render.c
 ,0 → 1,1967
+/*
+ * Copyright © 2010-2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "sna.h"
+#include "sna_render.h"
+#include "sna_render_inline.h"
+#include "sna_reg.h"
+//#include "sna_video.h"
+#include "gen3_render.h"
+#define NO_COMPOSITE 0
+#define NO_COMPOSITE_SPANS 0
+#define NO_COPY 0
+#define NO_COPY_BOXES 0
+#define NO_FILL 0
+#define NO_FILL_ONE 0
+#define NO_FILL_BOXES 0
+#define PREFER_BLT_FILL 1
+enum {
+        SHADER_NONE = 0,
+        SHADER_ZERO,
+        SHADER_BLACK,
+        SHADER_WHITE,
+        SHADER_CONSTANT,
+        SHADER_LINEAR,
+        SHADER_RADIAL,
+        SHADER_TEXTURE,
+        SHADER_OPACITY,
+};
+#define MAX_3D_SIZE 2048
+#define MAX_3D_PITCH 8192
+#define OUT_BATCH(v) batch_emit(sna, v)
+#define OUT_BATCH_F(v) batch_emit_float(sna, v)
+#define OUT_VERTEX(v) vertex_emit(sna, v)
+enum gen3_radial_mode {
+        RADIAL_ONE,
+        RADIAL_TWO
+};
+static const struct blendinfo {
+        bool dst_alpha;
+        bool src_alpha;
+        uint32_t src_blend;
+        uint32_t dst_blend;
+} gen3_blend_op[] = {
+        /* Clear */     {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
+        /* Src */       {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
+        /* Dst */       {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
+        /* Over */      {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
+        /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
+        /* In */        {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
+        /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
+        /* Out */       {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
+        /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
+        /* Atop */      {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+        /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
+        /* Xor */       {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
+        /* Add */       {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
+};
+#define S6_COLOR_WRITE_ONLY \
+        (S6_COLOR_WRITE_ENABLE | \
+         BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
+         BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
+         BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
+static const struct formatinfo {
+        unsigned int fmt, xfmt;
+        uint32_t card_fmt;
+        bool rb_reversed;
+} gen3_tex_formats[] = {
+        {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
+        {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
+        {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
+        {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
+        {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}
+};
+#define xFixedToDouble(f) pixman_fixed_to_double(f)
+static inline bool too_large(int width, int height)
+{
+        return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
+}
+static inline uint32_t gen3_buf_tiling(uint32_t tiling)
+{
+        uint32_t v = 0;
+        switch (tiling) {
+        case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
+        case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
+        case I915_TILING_NONE: break;
+        }
+        return v;
+}
+static uint32_t gen3_get_blend_cntl(int op,
+                                    bool has_component_alpha,
+                                    uint32_t dst_format)
+{
+        uint32_t sblend;
+        uint32_t dblend;
+    sblend = BLENDFACT_ONE;
+    dblend = BLENDFACT_INV_SRC_ALPHA;
+#if 0
+        if (op <= PictOpSrc) /* for clear and src disable blending */
+                return S6_COLOR_WRITE_ONLY;
+        /* If there's no dst alpha channel, adjust the blend op so that we'll
+         * treat it as always 1.
+         */
+        if (gen3_blend_op[op].dst_alpha) {
+                if (PICT_FORMAT_A(dst_format) == 0) {
+                        if (sblend == BLENDFACT_DST_ALPHA)
+                                sblend = BLENDFACT_ONE;
+                        else if (sblend == BLENDFACT_INV_DST_ALPHA)
+                                sblend = BLENDFACT_ZERO;
+                }
+                /* gen3 engine reads 8bit color buffer into green channel
+                 * in cases like color buffer blending etc., and also writes
+                 * back green channel.  So with dst_alpha blend we should use
+                 * color factor. See spec on "8-bit rendering".
+                 */
+                if (dst_format == PICT_a8) {
+                        if (sblend == BLENDFACT_DST_ALPHA)
+                                sblend = BLENDFACT_DST_COLR;
+                        else if (sblend == BLENDFACT_INV_DST_ALPHA)
+                                sblend = BLENDFACT_INV_DST_COLR;
+                }
+        }
+        /* If the source alpha is being used, then we should only be in a case
+         * where the source blend factor is 0, and the source blend value is the
+         * mask channels multiplied by the source picture's alpha.
+         */
+        if (has_component_alpha && gen3_blend_op[op].src_alpha) {
+                if (dblend == BLENDFACT_SRC_ALPHA)
+                        dblend = BLENDFACT_SRC_COLR;
+                else if (dblend == BLENDFACT_INV_SRC_ALPHA)
+                        dblend = BLENDFACT_INV_SRC_COLR;
+        }
+#endif
+        return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+                BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
+                sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
+                dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
+}
+static bool gen3_dst_rb_reversed(uint32_t format)
+{
+        switch (format) {
+        case PICT_a8r8g8b8:
+        case PICT_x8r8g8b8:
+        case PICT_a8:
+                return false;
+        default:
+                return true;
+        }
+}
+#define DSTORG_HORT_BIAS(x)             ((x)<<20)
+#define DSTORG_VERT_BIAS(x)             ((x)<<16)
+static uint32_t gen3_get_dst_format(uint32_t format)
+{
+#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
+        switch (format) {
+        default:
+        case PICT_a8r8g8b8:
+        case PICT_x8r8g8b8:
+        case PICT_a8b8g8r8:
+        case PICT_x8b8g8r8:
+                return BIAS | COLR_BUF_ARGB8888;
+        case PICT_a8:
+                return BIAS | COLR_BUF_8BIT;
+        }
+#undef BIAS
+}
+fastcall static void
+gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
+                                                   const struct sna_composite_op *op,
+                                                   const struct sna_composite_rectangles *r)
+{
+        float dst_x, dst_y;
+        float src_x, src_y;
+        float msk_x, msk_y;
+        float w, h;
+        float *v;
+        dst_x = r->dst.x + op->dst.x;
+        dst_y = r->dst.y + op->dst.y;
+        src_x = r->src.x + op->src.offset[0];
+        src_y = r->src.y + op->src.offset[1];
+        msk_x = r->mask.x + op->mask.offset[0];
+        msk_y = r->mask.y + op->mask.offset[1];
+        w = r->width;
+        h = r->height;
+        v = sna->render.vertices + sna->render.vertex_used;
+        sna->render.vertex_used += 18;
+        v[0] = dst_x + w;
+        v[1] = dst_y + h;
+        v[2] = (src_x + w) * op->src.scale[0];
+        v[3] = (src_y + h) * op->src.scale[1];
+        v[4] = (msk_x + w) * op->mask.scale[0];
+        v[5] = (msk_y + h) * op->mask.scale[1];
+        v[6] = dst_x;
+        v[7] = v[1];
+        v[8] = src_x * op->src.scale[0];
+        v[9] = v[3];
+        v[10] = msk_x * op->mask.scale[0];
+        v[11] =v[5];
+        v[12] = v[6];
+        v[13] = dst_y;
+        v[14] = v[8];
+        v[15] = src_y * op->src.scale[1];
+        v[16] = v[10];
+        v[17] = msk_y * op->mask.scale[1];
+}
+static inline void
+gen3_2d_perspective(struct sna *sna, int in, int out)
+{
+        gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
+        gen3_fs_mul(out,
+                    gen3_fs_operand(in, X, Y, ZERO, ONE),
+                    gen3_fs_operand_reg(out));
+}
+static inline void
+gen3_linear_coord(struct sna *sna,
+                  const struct sna_composite_channel *channel,
+                  int in, int out)
+{
+        int c = channel->u.gen3.constants;
+        if (!channel->is_affine) {
+                gen3_2d_perspective(sna, in, FS_U0);
+                in = FS_U0;
+        }
+        gen3_fs_mov(out, gen3_fs_operand_zero());
+        gen3_fs_dp3(out, MASK_X,
+                    gen3_fs_operand(in, X, Y, ONE, ZERO),
+                    gen3_fs_operand_reg(c));
+}
+static void
+gen3_radial_coord(struct sna *sna,
+                  const struct sna_composite_channel *channel,
+                  int in, int out)
+{
+        int c = channel->u.gen3.constants;
+        if (!channel->is_affine) {
+                gen3_2d_perspective(sna, in, FS_U0);
+                in = FS_U0;
+        }
+        switch (channel->u.gen3.mode) {
+        case RADIAL_ONE:
+                /*
+                   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
+                   r? = pdx*pdx + pdy*pdy
+                   t = r?/sqrt(r?) - r1/dr;
+                   */
+                gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
+                            gen3_fs_operand(in, X, Y, ZERO, ZERO),
+                            gen3_fs_operand(c, Z, Z, ZERO, ZERO),
+                            gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
+                gen3_fs_dp2add(FS_U0, MASK_X,
+                               gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+                               gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
+                               gen3_fs_operand_zero());
+                gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
+                gen3_fs_mad(out, 0,
+                            gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
+                            gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+                            gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
+                break;
+        case RADIAL_TWO:
+                /*
+                   pdx = x - c1x, pdy = y - c1y;
+                   A = dx? + dy? - dr?
+                   B = -2*(pdx*dx + pdy*dy + r1*dr);
+                   C = pdx? + pdy? - r1?;
+                   det = B*B - 4*A*C;
+                   t = (-B + sqrt (det)) / (2 * A)
+                   */
+                /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
+                gen3_fs_add(FS_U0,
+                            gen3_fs_operand(in, X, Y, ZERO, ZERO),
+                            gen3_fs_operand(c, X, Y, Z, ZERO));
+                /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
+                gen3_fs_dp3(FS_U0, MASK_W,
+                            gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
+                            gen3_fs_operand(c+1, X, Y, Z, ZERO));
+                /* u1.x = pdx? + pdy? - r1?; [C] */
+                gen3_fs_dp3(FS_U1, MASK_X,
+                            gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
+                            gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
+                /* u1.x = C, u1.y = B, u1.z=-4*A; */
+                gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
+                gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
+                /* u1.x = B? - 4*A*C */
+                gen3_fs_dp2add(FS_U1, MASK_X,
+                               gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
+                               gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
+                               gen3_fs_operand_zero());
+                /* out.x = -B + sqrt (B? - 4*A*C), */
+                gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
+                gen3_fs_mad(out, MASK_X,
+                            gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+                            gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
+                            gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
+                /* out.x = (-B + sqrt (B? - 4*A*C)) / (2 * A), */
+                gen3_fs_mul(out,
+                            gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
+                            gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
+                break;
+        }
+}
+static void
+gen3_composite_emit_shader(struct sna *sna,
+                           const struct sna_composite_op *op,
+                           uint8_t blend)
+{
+        bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
+        const struct sna_composite_channel *src, *mask;
+        struct gen3_render_state *state = &sna->render_state.gen3;
+        uint32_t shader_offset, id;
+        int src_reg, mask_reg;
+        int t, length;
+        src = &op->src;
+        mask = &op->mask;
+        if (mask->u.gen3.type == SHADER_NONE)
+                mask = NULL;
+        id = (src->u.gen3.type |
+              src->is_affine << 4 |
+              src->alpha_fixup << 5 |
+              src->rb_reversed << 6);
+        if (mask) {
+                id |= (mask->u.gen3.type << 8 |
+                       mask->is_affine << 12 |
+                       gen3_blend_op[blend].src_alpha << 13 |
+                       op->has_component_alpha << 14 |
+                       mask->alpha_fixup << 15 |
+                       mask->rb_reversed << 16);
+        }
+        id |= dst_is_alpha << 24;
+        id |= op->rb_reversed << 25;
+        if (id == state->last_shader)
+                return;
+        state->last_shader = id;
+        shader_offset = sna->kgem.nbatch++;
+        t = 0;
+        switch (src->u.gen3.type) {
+        case SHADER_NONE:
+        case SHADER_OPACITY:
+                assert(0);
+        case SHADER_ZERO:
+        case SHADER_BLACK:
+        case SHADER_WHITE:
+                break;
+        case SHADER_CONSTANT:
+                gen3_fs_dcl(FS_T8);
+                src_reg = FS_T8;
+                break;
+        case SHADER_TEXTURE:
+        case SHADER_RADIAL:
+        case SHADER_LINEAR:
+                gen3_fs_dcl(FS_S0);
+                gen3_fs_dcl(FS_T0);
+                t++;
+                break;
+        }
+        if (mask == NULL) {
+                switch (src->u.gen3.type) {
+                case SHADER_ZERO:
+                        gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
+                        goto done;
+                case SHADER_BLACK:
+                        if (dst_is_alpha)
+                                gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+                        else
+                                gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
+                        goto done;
+                case SHADER_WHITE:
+                        gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+                        goto done;
+                }
+                if (src->alpha_fixup && dst_is_alpha) {
+                        gen3_fs_mov(FS_OC, gen3_fs_operand_one());
+                        goto done;
+                }
+                /* No mask, so load directly to output color */
+                if (src->u.gen3.type != SHADER_CONSTANT) {
+                        if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
+                                src_reg = FS_R0;
+                        else
+                                src_reg = FS_OC;
+                }
+                switch (src->u.gen3.type) {
+                case SHADER_LINEAR:
+                        gen3_linear_coord(sna, src, FS_T0, FS_R0);
+                        gen3_fs_texld(src_reg, FS_S0, FS_R0);
+                        break;
+                case SHADER_RADIAL:
+                        gen3_radial_coord(sna, src, FS_T0, FS_R0);
+                        gen3_fs_texld(src_reg, FS_S0, FS_R0);
+                        break;
+                case SHADER_TEXTURE:
+                        if (src->is_affine)
+                                gen3_fs_texld(src_reg, FS_S0, FS_T0);
+                        else
+                                gen3_fs_texldp(src_reg, FS_S0, FS_T0);
+                        break;
+                case SHADER_NONE:
+                case SHADER_WHITE:
+                case SHADER_BLACK:
+                case SHADER_ZERO:
+                        assert(0);
+                case SHADER_CONSTANT:
+                        break;
+                }
+                if (src_reg != FS_OC) {
+                        if (src->alpha_fixup)
+                                gen3_fs_mov(FS_OC,
+                                            src->rb_reversed ^ op->rb_reversed ?
+                                            gen3_fs_operand(src_reg, Z, Y, X, ONE) :
+                                            gen3_fs_operand(src_reg, X, Y, Z, ONE));
+                        else if (dst_is_alpha)
+                                gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
+                        else if (src->rb_reversed ^ op->rb_reversed)
+                                gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
+                        else
+                                gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
+                } else if (src->alpha_fixup)
+                        gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
+        } else {
+                int out_reg = FS_OC;
+                if (op->rb_reversed)
+                        out_reg = FS_U0;
+                switch (mask->u.gen3.type) {
+                case SHADER_CONSTANT:
+                        gen3_fs_dcl(FS_T9);
+                        mask_reg = FS_T9;
+                        break;
+                case SHADER_TEXTURE:
+                case SHADER_LINEAR:
+                case SHADER_RADIAL:
+                        gen3_fs_dcl(FS_S0 + t);
+                        /* fall through */
+                case SHADER_OPACITY:
+                        gen3_fs_dcl(FS_T0 + t);
+                        break;
+                case SHADER_ZERO:
+                case SHADER_BLACK:
+                        assert(0);
+                case SHADER_NONE:
+                case SHADER_WHITE:
+                        break;
+                }
+                t = 0;
+                switch (src->u.gen3.type) {
+                case SHADER_LINEAR:
+                        gen3_linear_coord(sna, src, FS_T0, FS_R0);
+                        gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+                        src_reg = FS_R0;
+                        t++;
+                        break;
+                case SHADER_RADIAL:
+                        gen3_radial_coord(sna, src, FS_T0, FS_R0);
+                        gen3_fs_texld(FS_R0, FS_S0, FS_R0);
+                        src_reg = FS_R0;
+                        t++;
+                        break;
+                case SHADER_TEXTURE:
+                        if (src->is_affine)
+                                gen3_fs_texld(FS_R0, FS_S0, FS_T0);
+                        else
+                                gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
+                        src_reg = FS_R0;
+                        t++;
+                        break;
+                case SHADER_CONSTANT:
+                case SHADER_NONE:
+                case SHADER_ZERO:
+                case SHADER_BLACK:
+                case SHADER_WHITE:
+                        break;
+                }
+                if (src->alpha_fixup)
+                        gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
+                if (src->rb_reversed)
+                        gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
+                switch (mask->u.gen3.type) {
+                case SHADER_LINEAR:
+                        gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
+                        gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+                        mask_reg = FS_R1;
+                        break;
+                case SHADER_RADIAL:
+                        gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
+                        gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
+                        mask_reg = FS_R1;
+                        break;
+                case SHADER_TEXTURE:
+                        if (mask->is_affine)
+                                gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
+                        else
+                                gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
+                        mask_reg = FS_R1;
+                        break;
+                case SHADER_OPACITY:
+                        switch (src->u.gen3.type) {
+                        case SHADER_BLACK:
+                        case SHADER_WHITE:
+                                if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
+                                        gen3_fs_mov(out_reg,
+                                                    gen3_fs_operand(FS_T0 + t, X, X, X, X));
+                                } else {
+                                        gen3_fs_mov(out_reg,
+                                                    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
+                                }
+                                break;
+                        default:
+                                if (dst_is_alpha) {
+                                        gen3_fs_mul(out_reg,
+                                                    gen3_fs_operand(src_reg, W, W, W, W),
+                                                    gen3_fs_operand(FS_T0 + t, X, X, X, X));
+                                } else {
+                                        gen3_fs_mul(out_reg,
+                                                    gen3_fs_operand(src_reg, X, Y, Z, W),
+                                                    gen3_fs_operand(FS_T0 + t, X, X, X, X));
+                                }
+                        }
+                        goto mask_done;
+                case SHADER_CONSTANT:
+                case SHADER_ZERO:
+                case SHADER_BLACK:
+                case SHADER_WHITE:
+                case SHADER_NONE:
+                        break;
+                }
+                if (mask->alpha_fixup)
+                        gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
+                if (mask->rb_reversed)
+                        gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
+                if (dst_is_alpha) {
+                        switch (src->u.gen3.type) {
+                        case SHADER_BLACK:
+                        case SHADER_WHITE:
+                                gen3_fs_mov(out_reg,
+                                            gen3_fs_operand(mask_reg, W, W, W, W));
+                                break;
+                        default:
+                                gen3_fs_mul(out_reg,
+                                            gen3_fs_operand(src_reg, W, W, W, W),
+                                            gen3_fs_operand(mask_reg, W, W, W, W));
+                                break;
+                        }
+                } else {
+                        /* If component alpha is active in the mask and the blend
+                         * operation uses the source alpha, then we know we don't
+                         * need the source value (otherwise we would have hit a
+                         * fallback earlier), so we provide the source alpha (src.A *
+                         * mask.X) as output color.
+                         * Conversely, if CA is set and we don't need the source alpha,
+                         * then we produce the source value (src.X * mask.X) and the
+                         * source alpha is unused.  Otherwise, we provide the non-CA
+                         * source value (src.X * mask.A).
+                         */
+                        if (op->has_component_alpha) {
+                                switch (src->u.gen3.type) {
+                                case SHADER_BLACK:
+                                        if (gen3_blend_op[blend].src_alpha)
+                                                gen3_fs_mov(out_reg,
+                                                            gen3_fs_operand_reg(mask_reg));
+                                        else
+                                                gen3_fs_mov(out_reg,
+                                                            gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
+                                        break;
+                                case SHADER_WHITE:
+                                        gen3_fs_mov(out_reg,
+                                                    gen3_fs_operand_reg(mask_reg));
+                                        break;
+                                default:
+                                        if (gen3_blend_op[blend].src_alpha)
+                                                gen3_fs_mul(out_reg,
+                                                            gen3_fs_operand(src_reg, W, W, W, W),
+                                                            gen3_fs_operand_reg(mask_reg));
+                                        else
+                                                gen3_fs_mul(out_reg,
+                                                            gen3_fs_operand_reg(src_reg),
+                                                            gen3_fs_operand_reg(mask_reg));
+                                        break;
+                                }
+                        } else {
+                                switch (src->u.gen3.type) {
+                                case SHADER_WHITE:
+                                        gen3_fs_mov(out_reg,
+                                                    gen3_fs_operand(mask_reg, W, W, W, W));
+                                        break;
+                                case SHADER_BLACK:
+                                        gen3_fs_mov(out_reg,
+                                                    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
+                                        break;
+                                default:
+                                        gen3_fs_mul(out_reg,
+                                                    gen3_fs_operand_reg(src_reg),
+                                                    gen3_fs_operand(mask_reg, W, W, W, W));
+                                        break;
+                                }
+                        }
+                }
+mask_done:
+                if (op->rb_reversed)
+                        gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
+        }
+done:
+        length = sna->kgem.nbatch - shader_offset;
+        sna->kgem.batch[shader_offset] =
+                _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
+}
+static uint32_t gen3_ms_tiling(uint32_t tiling)
+{
+        uint32_t v = 0;
+        switch (tiling) {
+        case I915_TILING_Y: v |= MS3_TILE_WALK;
+        case I915_TILING_X: v |= MS3_TILED_SURFACE;
+        case I915_TILING_NONE: break;
+        }
+        return v;
+}
+static void gen3_emit_invariant(struct sna *sna)
+{
+        /* Disable independent alpha blend */
+        OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
+                  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
+                  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
+                  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
+        OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
+                  CSB_TCB(0, 0) |
+                  CSB_TCB(1, 1) |
+                  CSB_TCB(2, 2) |
+                  CSB_TCB(3, 3) |
+                  CSB_TCB(4, 4) |
+                  CSB_TCB(5, 5) |
+                  CSB_TCB(6, 6) |
+                  CSB_TCB(7, 7));
+        OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
+        OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
+        OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+                  S4_LINE_WIDTH_ONE |
+                  S4_CULLMODE_NONE |
+                  S4_VFMT_XY);
+        OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
+        OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
+        OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+        OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+        OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
+        OUT_BATCH(0x00000000);
+        OUT_BATCH(_3DSTATE_STIPPLE);
+        OUT_BATCH(0x00000000);
+        sna->render_state.gen3.need_invariant = false;
+}
+#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
+static void
+gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
+{
+        kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
+        if (!kgem_check_batch(&sna->kgem, 200)) {
+                DBG(("%s: flushing batch: size %d > %d\n",
+                     __FUNCTION__, 200,
+                     sna->kgem.surface-sna->kgem.nbatch));
+                kgem_submit(&sna->kgem);
+                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+        }
+        if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
+                DBG(("%s: flushing batch: reloc %d >= %d\n",
+                     __FUNCTION__,
+                     sna->kgem.nreloc,
+                     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
+                kgem_submit(&sna->kgem);
+                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+        }
+        if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
+                DBG(("%s: flushing batch: exec %d >= %d\n",
+                     __FUNCTION__,
+                     sna->kgem.nexec,
+                     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
+                kgem_submit(&sna->kgem);
+                _kgem_set_mode(&sna->kgem, KGEM_RENDER);
+        }
+        if (sna->render_state.gen3.need_invariant)
+                gen3_emit_invariant(sna);
+#undef MAX_OBJECTS
+}
+static void gen3_emit_target(struct sna *sna,
+                             struct kgem_bo *bo,
+                             int width,
+                             int height,
+                             int format)
+{
+        struct gen3_render_state *state = &sna->render_state.gen3;
+        assert(!too_large(width, height));
+        /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
+        assert(bo->unique_id != 0);
+        if (bo->unique_id != state->current_dst) {
+                uint32_t v;
+                DBG(("%s: setting new target id=%d, handle=%d\n",
+                     __FUNCTION__, bo->unique_id, bo->handle));
+                OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
+                OUT_BATCH(BUF_3D_ID_COLOR_BACK |
+                          gen3_buf_tiling(bo->tiling) |
+                          bo->pitch);
+                OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+                                         bo,
+                                         I915_GEM_DOMAIN_RENDER << 16 |
+                                         I915_GEM_DOMAIN_RENDER,
+));
+                OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
+                OUT_BATCH(gen3_get_dst_format(format));
+                v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
+                if (v != state->last_drawrect_limit) {
+                        OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
+                        OUT_BATCH(0); /* XXX dither origin? */
+                        OUT_BATCH(0);
+                        OUT_BATCH(v);
+                        OUT_BATCH(0);
+                        state->last_drawrect_limit = v;
+                }
+                state->current_dst = bo->unique_id;
+        }
+        kgem_bo_mark_dirty(bo);
+}
+static void gen3_emit_composite_state(struct sna *sna,
+                                      const struct sna_composite_op *op)
+{
+        struct gen3_render_state *state = &sna->render_state.gen3;
+        uint32_t map[4];
+        uint32_t sampler[4];
+        struct kgem_bo *bo[2];
+        unsigned int tex_count, n;
+        uint32_t ss2;
+        gen3_get_batch(sna, op);
+        if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
+                if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
+                        OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
+                else
+                        OUT_BATCH(_3DSTATE_MODES_5_CMD |
+                                  PIPELINE_FLUSH_RENDER_CACHE |
+                                  PIPELINE_FLUSH_TEXTURE_CACHE);
+                kgem_clear_dirty(&sna->kgem);
+        }
+        gen3_emit_target(sna,
+                         op->dst.bo,
+                         op->dst.width,
+                         op->dst.height,
+                         op->dst.format);
+        ss2 = ~0;
+        tex_count = 0;
+        switch (op->src.u.gen3.type) {
+        case SHADER_OPACITY:
+        case SHADER_NONE:
+                assert(0);
+        case SHADER_ZERO:
+        case SHADER_BLACK:
+        case SHADER_WHITE:
+                break;
+        case SHADER_CONSTANT:
+                if (op->src.u.gen3.mode != state->last_diffuse) {
+                        OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
+                        OUT_BATCH(op->src.u.gen3.mode);
+                        state->last_diffuse = op->src.u.gen3.mode;
+                }
+                break;
+        case SHADER_LINEAR:
+        case SHADER_RADIAL:
+        case SHADER_TEXTURE:
+                ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+                ss2 |= S2_TEXCOORD_FMT(tex_count,
+                                       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+                map[tex_count * 2 + 0] =
+                        op->src.card_format |
+                        gen3_ms_tiling(op->src.bo->tiling) |
+                        (op->src.height - 1) << MS3_HEIGHT_SHIFT |
+                        (op->src.width - 1) << MS3_WIDTH_SHIFT;
+                map[tex_count * 2 + 1] =
+                        (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+                sampler[tex_count * 2 + 0] = op->src.filter;
+                sampler[tex_count * 2 + 1] =
+                        op->src.repeat |
+                        tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+                bo[tex_count] = op->src.bo;
+                tex_count++;
+                break;
+        }
+        switch (op->mask.u.gen3.type) {
+        case SHADER_NONE:
+        case SHADER_ZERO:
+        case SHADER_BLACK:
+        case SHADER_WHITE:
+                break;
+        case SHADER_CONSTANT:
+                if (op->mask.u.gen3.mode != state->last_specular) {
+                        OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
+                        OUT_BATCH(op->mask.u.gen3.mode);
+                        state->last_specular = op->mask.u.gen3.mode;
+                }
+                break;
+        case SHADER_LINEAR:
+        case SHADER_RADIAL:
+        case SHADER_TEXTURE:
+                ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+                ss2 |= S2_TEXCOORD_FMT(tex_count,
+                                       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
+                map[tex_count * 2 + 0] =
+                        op->mask.card_format |
+                        gen3_ms_tiling(op->mask.bo->tiling) |
+                        (op->mask.height - 1) << MS3_HEIGHT_SHIFT |
+                        (op->mask.width - 1) << MS3_WIDTH_SHIFT;
+                map[tex_count * 2 + 1] =
+                        (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
+                sampler[tex_count * 2 + 0] = op->mask.filter;
+                sampler[tex_count * 2 + 1] =
+                        op->mask.repeat |
+                        tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
+                bo[tex_count] = op->mask.bo;
+                tex_count++;
+                break;
+        case SHADER_OPACITY:
+                ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
+                ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
+                break;
+        }
+        {
+                uint32_t blend_offset = sna->kgem.nbatch;
+                OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+                OUT_BATCH(ss2);
+                OUT_BATCH(gen3_get_blend_cntl(op->op,
+                                              op->has_component_alpha,
+                                              op->dst.format));
+                if (memcmp(sna->kgem.batch + state->last_blend + 1,
+                           sna->kgem.batch + blend_offset + 1,
+* 4) == 0)
+                        sna->kgem.nbatch = blend_offset;
+                else
+                        state->last_blend = blend_offset;
+        }
+        if (op->u.gen3.num_constants) {
+                int count = op->u.gen3.num_constants;
+                if (state->last_constants) {
+                        int last = sna->kgem.batch[state->last_constants+1];
+                        if (last == (1 << (count >> 2)) - 1 &&
+                            memcmp(&sna->kgem.batch[state->last_constants+2],
+                                   op->u.gen3.constants,
+                                   count * sizeof(uint32_t)) == 0)
+                                count = 0;
+                }
+                if (count) {
+                        state->last_constants = sna->kgem.nbatch;
+                        OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
+                        OUT_BATCH((1 << (count >> 2)) - 1);
+                        memcpy(sna->kgem.batch + sna->kgem.nbatch,
+                               op->u.gen3.constants,
+                               count * sizeof(uint32_t));
+                        sna->kgem.nbatch += count;
+                }
+        }
+        if (tex_count != 0) {
+                uint32_t rewind;
+                n = 0;
+                if (tex_count == state->tex_count) {
+                        for (; n < tex_count; n++) {
+                                if (map[2*n+0] != state->tex_map[2*n+0] ||
+                                    map[2*n+1] != state->tex_map[2*n+1] ||
+                                    state->tex_handle[n] != bo[n]->handle ||
+                                    state->tex_delta[n] != bo[n]->delta)
+                                        break;
+                        }
+                }
+                if (n < tex_count) {
+                        OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
+                        OUT_BATCH((1 << tex_count) - 1);
+                        for (n = 0; n < tex_count; n++) {
+                                OUT_BATCH(kgem_add_reloc(&sna->kgem,
+                                                         sna->kgem.nbatch,
+                                                         bo[n],
+                                                         I915_GEM_DOMAIN_SAMPLER<< 16,
+));
+                                OUT_BATCH(map[2*n + 0]);
+                                OUT_BATCH(map[2*n + 1]);
+                                state->tex_map[2*n+0] = map[2*n+0];
+                                state->tex_map[2*n+1] = map[2*n+1];
+                                state->tex_handle[n] = bo[n]->handle;
+                                state->tex_delta[n] = bo[n]->delta;
+                        }
+                        state->tex_count = n;
+                }
+                rewind = sna->kgem.nbatch;
+                OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
+                OUT_BATCH((1 << tex_count) - 1);
+                for (n = 0; n < tex_count; n++) {
+                        OUT_BATCH(sampler[2*n + 0]);
+                        OUT_BATCH(sampler[2*n + 1]);
+                        OUT_BATCH(0);
+                }
+                if (state->last_sampler &&
+                    memcmp(&sna->kgem.batch[state->last_sampler+1],
+                           &sna->kgem.batch[rewind + 1],
+                           (3*tex_count + 1)*sizeof(uint32_t)) == 0)
+                        sna->kgem.nbatch = rewind;
+                else
+                        state->last_sampler = rewind;
+        }
+        gen3_composite_emit_shader(sna, op, op->op);
+}
+static bool gen3_magic_ca_pass(struct sna *sna,
+                               const struct sna_composite_op *op)
+{
+        if (!op->need_magic_ca_pass)
+                return false;
+        DBG(("%s(%d)\n", __FUNCTION__,
+             sna->render.vertex_index - sna->render.vertex_start));
+        OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+        OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
+        gen3_composite_emit_shader(sna, op, PictOpAdd);
+        OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+                  (sna->render.vertex_index - sna->render.vertex_start));
+        OUT_BATCH(sna->render.vertex_start);
+        sna->render_state.gen3.last_blend = 0;
+        return true;
+}
+static void gen3_vertex_flush(struct sna *sna)
+{
+        assert(sna->render.vertex_offset);
+        DBG(("%s[%x] = %d\n", __FUNCTION__,
+*sna->render.vertex_offset,
+             sna->render.vertex_index - sna->render.vertex_start));
+        sna->kgem.batch[sna->render.vertex_offset] =
+                PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
+                (sna->render.vertex_index - sna->render.vertex_start);
+        sna->kgem.batch[sna->render.vertex_offset + 1] =
+                sna->render.vertex_start;
+        sna->render.vertex_offset = 0;
+}
+static int gen3_vertex_finish(struct sna *sna)
+{
+        struct kgem_bo *bo;
+        DBG(("%s: used=%d/%d, vbo active? %d\n",
+             __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+             sna->render.vbo ? sna->render.vbo->handle : 0));
+        assert(sna->render.vertex_offset == 0);
+        assert(sna->render.vertex_used);
+        assert(sna->render.vertex_used <= sna->render.vertex_size);
+        sna_vertex_wait__locked(&sna->render);
+        bo = sna->render.vbo;
+        if (bo) {
+                DBG(("%s: reloc = %d\n", __FUNCTION__,
+                     sna->render.vertex_reloc[0]));
+                if (sna->render.vertex_reloc[0]) {
+                        sna->kgem.batch[sna->render.vertex_reloc[0]] =
+                                kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+                                               bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
+                        sna->render.vertex_reloc[0] = 0;
+                }
+                sna->render.vertex_used = 0;
+                sna->render.vertex_index = 0;
+                sna->render.vbo = NULL;
+                kgem_bo_destroy(&sna->kgem, bo);
+        }
+        sna->render.vertices = NULL;
+        sna->render.vbo = kgem_create_linear(&sna->kgem,
+*1024, CREATE_GTT_MAP);
+        if (sna->render.vbo)
+                sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
+        if (sna->render.vertices == NULL) {
+                if (sna->render.vbo)
+                        kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+                sna->render.vbo = NULL;
+                return 0;
+        }
+        assert(sna->render.vbo->snoop == false);
+        if (sna->render.vertex_used) {
+                memcpy(sna->render.vertices,
+                       sna->render.vertex_data,
+                       sizeof(float)*sna->render.vertex_used);
+        }
+        sna->render.vertex_size = 64 * 1024 - 1;
+        return sna->render.vertex_size - sna->render.vertex_used;
+}
+static void gen3_vertex_close(struct sna *sna)
+{
+        struct kgem_bo *bo, *free_bo = NULL;
+        unsigned int delta = 0;
+        assert(sna->render.vertex_offset == 0);
+        if (sna->render.vertex_reloc[0] == 0)
+                return;
+        DBG(("%s: used=%d/%d, vbo active? %d\n",
+             __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+             sna->render.vbo ? sna->render.vbo->handle : 0));
+        bo = sna->render.vbo;
+        if (bo) {
+                if (sna->render.vertex_size - sna->render.vertex_used < 64) {
+                        DBG(("%s: discarding full vbo\n", __FUNCTION__));
+                        sna->render.vbo = NULL;
+                        sna->render.vertices = sna->render.vertex_data;
+                        sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+                        free_bo = bo;
+                } else if (IS_CPU_MAP(bo->map)) {
+                        DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
+                        sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
+                        if (sna->render.vertices == NULL) {
+                                DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
+                                sna->render.vbo = NULL;
+                                sna->render.vertices = sna->render.vertex_data;
+                                sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+                                free_bo = bo;
+                        }
+                }
+        } else {
+                if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+                        DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
+                             sna->render.vertex_used, sna->kgem.nbatch));
+                        memcpy(sna->kgem.batch + sna->kgem.nbatch,
+                               sna->render.vertex_data,
+                               sna->render.vertex_used * 4);
+                        delta = sna->kgem.nbatch * 4;
+                        bo = NULL;
+                        sna->kgem.nbatch += sna->render.vertex_used;
+                } else {
+                        DBG(("%s: new vbo: %d\n", __FUNCTION__,
+                             sna->render.vertex_used));
+                        bo = kgem_create_linear(&sna->kgem,
+*sna->render.vertex_used,
+                                                CREATE_NO_THROTTLE);
+                        if (bo) {
+                                assert(bo->snoop == false);
+                                kgem_bo_write(&sna->kgem, bo,
+                                              sna->render.vertex_data,
+*sna->render.vertex_used);
+                        }
+                        free_bo = bo;
+                }
+        }
+        DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
+        sna->kgem.batch[sna->render.vertex_reloc[0]] =
+                kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+                               bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
+        sna->render.vertex_reloc[0] = 0;
+        if (sna->render.vbo == NULL) {
+                DBG(("%s: resetting vbo\n", __FUNCTION__));
+                sna->render.vertex_used = 0;
+                sna->render.vertex_index = 0;
+                assert(sna->render.vertices == sna->render.vertex_data);
+                assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
+        }
+        if (free_bo)
+                kgem_bo_destroy(&sna->kgem, free_bo);
+}
+static bool gen3_rectangle_begin(struct sna *sna,
+                                 const struct sna_composite_op *op)
+{
+        struct gen3_render_state *state = &sna->render_state.gen3;
+        int ndwords, i1_cmd = 0, i1_len = 0;
+        if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
+                return true;
+        ndwords = 2;
+        if (op->need_magic_ca_pass)
+                ndwords += 100;
+        if (sna->render.vertex_reloc[0] == 0)
+                i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
+        if (state->floats_per_vertex != op->floats_per_vertex)
+                i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
+        if (!kgem_check_batch(&sna->kgem, ndwords+1))
+                return false;
+        if (i1_cmd) {
+                OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
+                if (sna->render.vertex_reloc[0] == 0)
+                        sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
+                if (state->floats_per_vertex != op->floats_per_vertex) {
+                        state->floats_per_vertex = op->floats_per_vertex;
+                        OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
+                                  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
+                }
+        }
+        if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
+            !op->need_magic_ca_pass) {
+                sna->render.vertex_offset = state->last_vertex_offset;
+        } else {
+                sna->render.vertex_offset = sna->kgem.nbatch;
+                OUT_BATCH(MI_NOOP); /* to be filled later */
+                OUT_BATCH(MI_NOOP);
+                sna->render.vertex_start = sna->render.vertex_index;
+                state->last_vertex_offset = sna->render.vertex_offset;
+        }
+        return true;
+}
+static int gen3_get_rectangles__flush(struct sna *sna,
+                                      const struct sna_composite_op *op)
+{
+        /* Preventing discarding new vbo after lock contention */
+        if (sna_vertex_wait__locked(&sna->render)) {
+                int rem = vertex_space(sna);
+                if (rem > op->floats_per_rect)
+                        return rem;
+        }
+        if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
+                return 0;
+        if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
+                return 0;
+        if (sna->render.vertex_offset) {
+                gen3_vertex_flush(sna);
+                if (gen3_magic_ca_pass(sna, op)) {
+                        OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+                        OUT_BATCH(gen3_get_blend_cntl(op->op,
+                                                      op->has_component_alpha,
+                                                      op->dst.format));
+                        gen3_composite_emit_shader(sna, op, op->op);
+                }
+        }
+        return gen3_vertex_finish(sna);
+}
+inline static int gen3_get_rectangles(struct sna *sna,
+                                      const struct sna_composite_op *op,
+                                      int want)
+{
+        int rem;
+        DBG(("%s: want=%d, rem=%d\n",
+             __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
+        assert(want);
+        assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
+start:
+        rem = vertex_space(sna);
+        if (unlikely(op->floats_per_rect > rem)) {
+                DBG(("flushing vbo for %s: %d < %d\n",
+                     __FUNCTION__, rem, op->floats_per_rect));
+                rem = gen3_get_rectangles__flush(sna, op);
+                if (unlikely(rem == 0))
+                        goto flush;
+        }
+        if (unlikely(sna->render.vertex_offset == 0)) {
+                if (!gen3_rectangle_begin(sna, op))
+                        goto flush;
+                else
+                        goto start;
+        }
+        assert(op->floats_per_rect >= vertex_space(sna));
+        assert(rem <= vertex_space(sna));
+        if (want > 1 && want * op->floats_per_rect > rem)
+                want = rem / op->floats_per_rect;
+        sna->render.vertex_index += 3*want;
+        assert(want);
+        assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
+        return want;
+flush:
+        DBG(("%s: flushing batch\n", __FUNCTION__));
+        if (sna->render.vertex_offset) {
+                gen3_vertex_flush(sna);
+                gen3_magic_ca_pass(sna, op);
+        }
+        sna_vertex_wait__locked(&sna->render);
+        _kgem_submit(&sna->kgem);
+        gen3_emit_composite_state(sna, op);
+        assert(sna->render.vertex_offset == 0);
+        assert(sna->render.vertex_reloc[0] == 0);
+        goto start;
+}
+fastcall static void
+gen3_render_composite_blt(struct sna *sna,
+                          const struct sna_composite_op *op,
+                          const struct sna_composite_rectangles *r)
+{
+        DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
+             r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
+             r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
+             r->dst.x, r->dst.y, op->dst.x, op->dst.y,
+             r->width, r->height));
+        gen3_get_rectangles(sna, op, 1);
+        op->prim_emit(sna, op, r);
+}
+static void
+gen3_render_composite_done(struct sna *sna,
+                           const struct sna_composite_op *op)
+{
+        DBG(("%s()\n", __FUNCTION__));
+        if (sna->render.vertex_offset) {
+                gen3_vertex_flush(sna);
+                gen3_magic_ca_pass(sna, op);
+        }
+}
+static void
+discard_vbo(struct sna *sna)
+{
+        kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+        sna->render.vbo = NULL;
+        sna->render.vertices = sna->render.vertex_data;
+        sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+        sna->render.vertex_used = 0;
+        sna->render.vertex_index = 0;
+}
+static void
+gen3_render_reset(struct sna *sna)
+{
+        struct gen3_render_state *state = &sna->render_state.gen3;
+        state->need_invariant = true;
+        state->current_dst = 0;
+        state->tex_count = 0;
+        state->last_drawrect_limit = ~0U;
+        state->last_target = 0;
+        state->last_blend = 0;
+        state->last_constants = 0;
+        state->last_sampler = 0;
+        state->last_shader = 0x7fffffff;
+        state->last_diffuse = 0xcc00ffee;
+        state->last_specular = 0xcc00ffee;
+        state->floats_per_vertex = 0;
+        state->last_floats_per_vertex = 0;
+        state->last_vertex_offset = 0;
+        if (sna->render.vbo != NULL &&
+            !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
+                DBG(("%s: discarding vbo as next access will stall: %d\n",
+                     __FUNCTION__, sna->render.vbo->presumed_offset));
+                discard_vbo(sna);
+        }
+        sna->render.vertex_reloc[0] = 0;
+        sna->render.vertex_offset = 0;
+}
+static void
+gen3_render_retire(struct kgem *kgem)
+{
+        struct sna *sna;
+        sna = container_of(kgem, struct sna, kgem);
+        if (sna->render.vertex_reloc[0] == 0 &&
+            sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
+                DBG(("%s: resetting idle vbo\n", __FUNCTION__));
+                sna->render.vertex_used = 0;
+                sna->render.vertex_index = 0;
+        }
+}
+static void
+gen3_render_expire(struct kgem *kgem)
+{
+        struct sna *sna;
+        sna = container_of(kgem, struct sna, kgem);
+        if (sna->render.vbo && !sna->render.vertex_used) {
+                DBG(("%s: discarding vbo\n", __FUNCTION__));
+                discard_vbo(sna);
+        }
+}
+static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
+                                              CARD32 format)
+{
+        unsigned int i;
+        for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
+                if (gen3_tex_formats[i].fmt == format) {
+                        channel->card_format = gen3_tex_formats[i].card_fmt;
+                        channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
+                        return true;
+                }
+        }
+        return false;
+}
+static void
+gen3_align_vertex(struct sna *sna,
+                  const struct sna_composite_op *op)
+{
+        if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
+                if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
+                        gen3_vertex_finish(sna);
+                DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
+                     sna->render_state.gen3.last_floats_per_vertex,
+                     op->floats_per_vertex,
+                     sna->render.vertex_index,
+                     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
+                sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
+                sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
+                assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
+                sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
+        }
+}
+static inline bool is_constant_ps(uint32_t type)
+{
+        switch (type) {
+        case SHADER_NONE: /* be warned! */
+        case SHADER_ZERO:
+        case SHADER_BLACK:
+        case SHADER_WHITE:
+        case SHADER_CONSTANT:
+                return true;
+        default:
+                return false;
+        }
+}
+static bool
+gen3_blit_tex(struct sna *sna,
+              uint8_t op,
+                      PixmapPtr src, struct kgem_bo *src_bo,
+                      PixmapPtr mask,struct kgem_bo *mask_bo,
+                      PixmapPtr dst, struct kgem_bo *dst_bo,
+              int32_t src_x, int32_t src_y,
+              int32_t msk_x, int32_t msk_y,
+              int32_t dst_x, int32_t dst_y,
+              int32_t width, int32_t height,
+              struct sna_composite_op *tmp)
+{
+    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
+         width, height, sna->kgem.ring));
+    tmp->op = PictOpSrc;
+    tmp->dst.pixmap = dst;
+    tmp->dst.bo     = dst_bo;
+    tmp->dst.width  = dst->drawable.width;
+    tmp->dst.height = dst->drawable.height;
+    tmp->dst.format = PICT_x8r8g8b8;
+        tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
+        tmp->u.gen3.num_constants = 0;
+        tmp->src.u.gen3.type = SHADER_TEXTURE;
+        tmp->src.is_affine = true;
+        tmp->src.repeat = RepeatNone;
+        tmp->src.filter = PictFilterNearest;
+    tmp->src.bo = src_bo;
+        tmp->src.pict_format = PICT_x8r8g8b8;
+        gen3_composite_channel_set_format(&tmp->src, tmp->src.pict_format);
+    tmp->src.width  = src->drawable.width;
+    tmp->src.height = src->drawable.height;
+        tmp->mask.u.gen3.type = SHADER_TEXTURE;
+        tmp->mask.is_affine = true;
+        tmp->need_magic_ca_pass = false;
+        tmp->has_component_alpha = false;
+        tmp->mask.repeat = RepeatNone;
+        tmp->mask.filter = PictFilterNearest;
+    tmp->mask.is_affine = true;
+    tmp->mask.bo = mask_bo;
+    tmp->mask.pict_format = PIXMAN_a8;
+        gen3_composite_channel_set_format(&tmp->mask, tmp->mask.pict_format);
+    tmp->mask.width  = mask->drawable.width;
+    tmp->mask.height = mask->drawable.height;
+    tmp->src.scale[0] = 1.f/width;            //src->width;
+    tmp->src.scale[1] = 1.f/height;            //src->height;
+//    tmp->src.offset[0] = -dst_x;
+//    tmp->src.offset[1] = -dst_y;
+    tmp->mask.scale[0] = 1.f/mask->drawable.width;
+    tmp->mask.scale[1] = 1.f/mask->drawable.height;
+//    tmp->mask.offset[0] = -dst_x;
+//    tmp->mask.offset[1] = -dst_y;
+        tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
+        tmp->floats_per_vertex = 2;
+        if (!is_constant_ps(tmp->src.u.gen3.type))
+                tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
+        if (!is_constant_ps(tmp->mask.u.gen3.type))
+                tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
+        DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
+             !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
+             !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
+             tmp->floats_per_vertex,
+             tmp->prim_emit != gen3_emit_composite_primitive));
+        tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
+        tmp->blt   = gen3_render_composite_blt;
+        tmp->done  = gen3_render_composite_done;
+        if (!kgem_check_bo(&sna->kgem,
+                           tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
+                           NULL)) {
+                kgem_submit(&sna->kgem);
+        }
+        gen3_emit_composite_state(sna, tmp);
+        gen3_align_vertex(sna, tmp);
+        return true;
+}
+static void gen3_render_flush(struct sna *sna)
+{
+        gen3_vertex_close(sna);
+        assert(sna->render.vertex_reloc[0] == 0);
+        assert(sna->render.vertex_offset == 0);
+}
+static void
+gen3_render_fini(struct sna *sna)
+{
+}
+bool gen3_render_init(struct sna *sna)
+{
+        struct sna_render *render = &sna->render;
+//      render->video = gen3_render_video;
+    sna->render.blit_tex = gen3_blit_tex;
+        render->reset = gen3_render_reset;
+        render->flush = gen3_render_flush;
+        render->fini = gen3_render_fini;
+        render->max_3d_size = MAX_3D_SIZE;
+        render->max_3d_pitch = MAX_3D_PITCH;
+        sna->kgem.retire = gen3_render_retire;
+        sna->kgem.expire = gen3_render_expire;
+        return true;
+}

 /drivers/video/Intel-2D/gen3_render.h
 ,0 → 1,1479
+/**************************************************************************
+ *
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef _I915_REG_H_
+#define _I915_REG_H_
+#define CMD_3D (3 << 29)
+#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
+#define PRIM3D                  (CMD_3D | (0x1f<<24))
+#define PRIM3D_INDIRECT_SEQUENTIAL      ((1<<23) | (0<<17))
+#define PRIM3D_TRILIST          (PRIM3D | (0x0<<18))
+#define PRIM3D_TRISTRIP         (PRIM3D | (0x1<<18))
+#define PRIM3D_TRISTRIP_RVRSE   (PRIM3D | (0x2<<18))
+#define PRIM3D_TRIFAN           (PRIM3D | (0x3<<18))
+#define PRIM3D_POLY             (PRIM3D | (0x4<<18))
+#define PRIM3D_LINELIST         (PRIM3D | (0x5<<18))
+#define PRIM3D_LINESTRIP        (PRIM3D | (0x6<<18))
+#define PRIM3D_RECTLIST         (PRIM3D | (0x7<<18))
+#define PRIM3D_POINTLIST        (PRIM3D | (0x8<<18))
+#define PRIM3D_DIB              (PRIM3D | (0x9<<18))
+#define PRIM3D_CLEAR_RECT       (PRIM3D | (0xa<<18))
+#define PRIM3D_ZONE_INIT        (PRIM3D | (0xd<<18))
+#define PRIM3D_MASK             (0x1f<<18)
+/* p137 */
+#define _3DSTATE_AA_CMD                 (CMD_3D | (0x06<<24))
+#define AA_LINE_ECAAR_WIDTH_ENABLE      (1<<16)
+#define AA_LINE_ECAAR_WIDTH_0_5         0
+#define AA_LINE_ECAAR_WIDTH_1_0         (1<<14)
+#define AA_LINE_ECAAR_WIDTH_2_0         (2<<14)
+#define AA_LINE_ECAAR_WIDTH_4_0         (3<<14)
+#define AA_LINE_REGION_WIDTH_ENABLE     (1<<8)
+#define AA_LINE_REGION_WIDTH_0_5        0
+#define AA_LINE_REGION_WIDTH_1_0        (1<<6)
+#define AA_LINE_REGION_WIDTH_2_0        (2<<6)
+#define AA_LINE_REGION_WIDTH_4_0        (3<<6)
+/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
+#define _3DSTATE_BACKFACE_STENCIL_OPS    (CMD_3D | (0x8<<24))
+#define BFO_ENABLE_STENCIL_REF          (1<<23)
+#define BFO_STENCIL_REF_SHIFT           15
+#define BFO_STENCIL_REF_MASK            (0xff<<15)
+#define BFO_ENABLE_STENCIL_FUNCS        (1<<14)
+#define BFO_STENCIL_TEST_SHIFT          11
+#define BFO_STENCIL_TEST_MASK           (0x7<<11)
+#define BFO_STENCIL_FAIL_SHIFT          8
+#define BFO_STENCIL_FAIL_MASK           (0x7<<8)
+#define BFO_STENCIL_PASS_Z_FAIL_SHIFT   5
+#define BFO_STENCIL_PASS_Z_FAIL_MASK    (0x7<<5)
+#define BFO_STENCIL_PASS_Z_PASS_SHIFT   2
+#define BFO_STENCIL_PASS_Z_PASS_MASK    (0x7<<2)
+#define BFO_ENABLE_STENCIL_TWO_SIDE     (1<<1)
+#define BFO_STENCIL_TWO_SIDE            (1<<0)
+/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
+#define _3DSTATE_BACKFACE_STENCIL_MASKS    (CMD_3D | (0x9<<24))
+#define BFM_ENABLE_STENCIL_TEST_MASK      (1<<17)
+#define BFM_ENABLE_STENCIL_WRITE_MASK     (1<<16)
+#define BFM_STENCIL_TEST_MASK_SHIFT       8
+#define BFM_STENCIL_TEST_MASK_MASK        (0xff<<8)
+#define BFM_STENCIL_WRITE_MASK_SHIFT      0
+#define BFM_STENCIL_WRITE_MASK_MASK       (0xff<<0)
+/* 3DSTATE_BIN_CONTROL p141 */
+/* p143 */
+#define _3DSTATE_BUF_INFO_CMD   (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
+/* Dword 1 */
+#define BUF_3D_ID_COLOR_BACK    (0x3<<24)
+#define BUF_3D_ID_DEPTH         (0x7<<24)
+#define BUF_3D_USE_FENCE        (1<<23)
+#define BUF_3D_TILED_SURFACE    (1<<22)
+#define BUF_3D_TILE_WALK_X      0
+#define BUF_3D_TILE_WALK_Y      (1<<21)
+/* Dword 2 */
+#define BUF_3D_ADDR(x)          ((x) & ~0x3)
+/* 3DSTATE_CHROMA_KEY */
+/* 3DSTATE_CLEAR_PARAMETERS, p150 */
+#define _3DSTATE_CLEAR_PARAMETERS   (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
+/* Dword 1 */
+#define CLEARPARAM_CLEAR_RECT       (1 << 16)
+#define CLEARPARAM_ZONE_INIT        (0 << 16)
+#define CLEARPARAM_WRITE_COLOR      (1 << 2)
+#define CLEARPARAM_WRITE_DEPTH      (1 << 1)
+#define CLEARPARAM_WRITE_STENCIL    (1 << 0)
+/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
+#define _3DSTATE_CONST_BLEND_COLOR_CMD  (CMD_3D | (0x1d<<24) | (0x88<<16))
+/* 3DSTATE_COORD_SET_BINDINGS, p154 */
+#define _3DSTATE_COORD_SET_BINDINGS      (CMD_3D | (0x16<<24))
+#define CSB_TCB(iunit, eunit)           ((eunit)<<(iunit*3))
+/* p156 */
+#define _3DSTATE_DFLT_DIFFUSE_CMD       (CMD_3D | (0x1d<<24) | (0x99<<16))
+/* p157 */
+#define _3DSTATE_DFLT_SPEC_CMD          (CMD_3D | (0x1d<<24) | (0x9a<<16))
+/* p158 */
+#define _3DSTATE_DFLT_Z_CMD             (CMD_3D | (0x1d<<24) | (0x98<<16))
+/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
+#define _3DSTATE_DEPTH_OFFSET_SCALE       (CMD_3D | (0x1d<<24) | (0x97<<16))
+/* scale in dword 1 */
+/* The depth subrectangle is not supported, but must be disabled. */
+/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
+#define _3DSTATE_DEPTH_SUBRECT_DISABLE  (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0))
+/* p161 */
+#define _3DSTATE_DST_BUF_VARS_CMD       (CMD_3D | (0x1d<<24) | (0x85<<16))
+/* Dword 1 */
+#define TEX_DEFAULT_COLOR_OGL           (0<<30)
+#define TEX_DEFAULT_COLOR_D3D           (1<<30)
+#define ZR_EARLY_DEPTH                  (1<<29)
+#define LOD_PRECLAMP_OGL                (1<<28)
+#define LOD_PRECLAMP_D3D                (0<<28)
+#define DITHER_FULL_ALWAYS              (0<<26)
+#define DITHER_FULL_ON_FB_BLEND         (1<<26)
+#define DITHER_CLAMPED_ALWAYS           (2<<26)
+#define LINEAR_GAMMA_BLEND_32BPP        (1<<25)
+#define DEBUG_DISABLE_ENH_DITHER        (1<<24)
+#define DSTORG_HORT_BIAS(x)             ((x)<<20)
+#define DSTORG_VERT_BIAS(x)             ((x)<<16)
+#define COLOR_4_2_2_CHNL_WRT_ALL        0
+#define COLOR_4_2_2_CHNL_WRT_Y          (1<<12)
+#define COLOR_4_2_2_CHNL_WRT_CR         (2<<12)
+#define COLOR_4_2_2_CHNL_WRT_CB         (3<<12)
+#define COLOR_4_2_2_CHNL_WRT_CRCB       (4<<12)
+#define COLR_BUF_8BIT                   0
+#define COLR_BUF_RGB555                 (1<<8)
+#define COLR_BUF_RGB565                 (2<<8)
+#define COLR_BUF_ARGB8888               (3<<8)
+#define COLR_BUF_ARGB4444               (8<<8)
+#define COLR_BUF_ARGB1555               (9<<8)
+#define COLR_BUF_ARGB2AAA               (0xa<<8)
+#define DEPTH_IS_Z                      0
+#define DEPTH_IS_W                      (1<<6)
+#define DEPTH_FRMT_16_FIXED             0
+#define DEPTH_FRMT_16_FLOAT             (1<<2)
+#define DEPTH_FRMT_24_FIXED_8_OTHER     (2<<2)
+#define DEPTH_FRMT_24_FLOAT_8_OTHER     (3<<2)
+#define VERT_LINE_STRIDE_1              (1<<1)
+#define VERT_LINE_STRIDE_0              0
+#define VERT_LINE_STRIDE_OFS_1          1
+#define VERT_LINE_STRIDE_OFS_0          0
+/* p166 */
+#define _3DSTATE_DRAW_RECT_CMD          (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
+/* Dword 1 */
+#define DRAW_RECT_DIS_DEPTH_OFS         (1<<30)
+#define DRAW_DITHER_OFS_X(x)            ((x)<<26)
+#define DRAW_DITHER_OFS_Y(x)            ((x)<<24)
+/* Dword 2 */
+#define DRAW_YMIN(x)                    ((uint16_t)(x)<<16)
+#define DRAW_XMIN(x)                    ((uint16_t)(x))
+/* Dword 3 */
+#define DRAW_YMAX(x)                    ((uint16_t)(x)<<16)
+#define DRAW_XMAX(x)                    ((uint16_t)(x))
+/* Dword 4 */
+#define DRAW_YORG(x)                    ((uint16_t)(x)<<16)
+#define DRAW_XORG(x)                    ((uint16_t)(x))
+/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
+/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
+/* _3DSTATE_FOG_COLOR, p173 */
+#define _3DSTATE_FOG_COLOR_CMD          (CMD_3D|(0x15<<24))
+#define FOG_COLOR_RED(x)                ((x)<<16)
+#define FOG_COLOR_GREEN(x)              ((x)<<8)
+#define FOG_COLOR_BLUE(x)               (x)
+/* _3DSTATE_FOG_MODE, p174 */
+#define _3DSTATE_FOG_MODE_CMD           (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
+/* Dword 1 */
+#define FMC1_FOGFUNC_MODIFY_ENABLE      (1<<31)
+#define FMC1_FOGFUNC_VERTEX             (0<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP          (1<<28)
+#define FMC1_FOGFUNC_PIXEL_EXP2         (2<<28)
+#define FMC1_FOGFUNC_PIXEL_LINEAR       (3<<28)
+#define FMC1_FOGFUNC_MASK               (3<<28)
+#define FMC1_FOGINDEX_MODIFY_ENABLE     (1<<27)
+#define FMC1_FOGINDEX_Z                 (0<<25)
+#define FMC1_FOGINDEX_W                 (1<<25)
+#define FMC1_C1_C2_MODIFY_ENABLE        (1<<24)
+#define FMC1_DENSITY_MODIFY_ENABLE      (1<<23)
+#define FMC1_C1_ONE                     (1<<13)
+#define FMC1_C1_MASK                    (0xffff<<4)
+/* Dword 2 */
+#define FMC2_C2_ONE                     (1<<16)
+/* Dword 3 */
+#define FMC3_D_ONE                      (1<<16)
+/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
+#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD    (CMD_3D|(0x0b<<24))
+#define IAB_MODIFY_ENABLE               (1<<23)
+#define IAB_ENABLE                      (1<<22)
+#define IAB_MODIFY_FUNC                 (1<<21)
+#define IAB_FUNC_SHIFT                  16
+#define IAB_MODIFY_SRC_FACTOR           (1<<11)
+#define IAB_SRC_FACTOR_SHIFT            6
+#define IAB_SRC_FACTOR_MASK             (BLENDFACT_MASK<<6)
+#define IAB_MODIFY_DST_FACTOR           (1<<5)
+#define IAB_DST_FACTOR_SHIFT            0
+#define IAB_DST_FACTOR_MASK             (BLENDFACT_MASK<<0)
+#define BLENDFACT_ZERO                  0x01
+#define BLENDFACT_ONE                   0x02
+#define BLENDFACT_SRC_COLR              0x03
+#define BLENDFACT_INV_SRC_COLR          0x04
+#define BLENDFACT_SRC_ALPHA             0x05
+#define BLENDFACT_INV_SRC_ALPHA         0x06
+#define BLENDFACT_DST_ALPHA             0x07
+#define BLENDFACT_INV_DST_ALPHA         0x08
+#define BLENDFACT_DST_COLR              0x09
+#define BLENDFACT_INV_DST_COLR          0x0a
+#define BLENDFACT_SRC_ALPHA_SATURATE    0x0b
+#define BLENDFACT_CONST_COLOR           0x0c
+#define BLENDFACT_INV_CONST_COLOR       0x0d
+#define BLENDFACT_CONST_ALPHA           0x0e
+#define BLENDFACT_INV_CONST_ALPHA       0x0f
+#define BLENDFACT_MASK                  0x0f
+#define BLENDFUNC_ADD                   0x0
+#define BLENDFUNC_SUBTRACT              0x1
+#define BLENDFUNC_REVERSE_SUBTRACT      0x2
+#define BLENDFUNC_MIN                   0x3
+#define BLENDFUNC_MAX                   0x4
+#define BLENDFUNC_MASK                  0x7
+/* 3DSTATE_LOAD_INDIRECT, p180 */
+#define _3DSTATE_LOAD_INDIRECT          (CMD_3D|(0x1d<<24)|(0x7<<16))
+#define LI0_STATE_STATIC_INDIRECT       (0x01<<8)
+#define LI0_STATE_DYNAMIC_INDIRECT      (0x02<<8)
+#define LI0_STATE_SAMPLER               (0x04<<8)
+#define LI0_STATE_MAP                   (0x08<<8)
+#define LI0_STATE_PROGRAM               (0x10<<8)
+#define LI0_STATE_CONSTANTS             (0x20<<8)
+#define SIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SIS0_FORCE_LOAD                 (1<<1)
+#define SIS0_BUFFER_VALID               (1<<0)
+#define SIS1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define DIS0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define DIS0_BUFFER_RESET               (1<<1)
+#define DIS0_BUFFER_VALID               (1<<0)
+#define SSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define SSB0_FORCE_LOAD                 (1<<1)
+#define SSB0_BUFFER_VALID               (1<<0)
+#define SSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define MSB0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define MSB0_FORCE_LOAD                 (1<<1)
+#define MSB0_BUFFER_VALID               (1<<0)
+#define MSB1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define PSP0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSP0_FORCE_LOAD                 (1<<1)
+#define PSP0_BUFFER_VALID               (1<<0)
+#define PSP1_BUFFER_LENGTH(x)           ((x)&0xff)
+#define PSC0_BUFFER_ADDRESS(x)          ((x)&~0x3)
+#define PSC0_FORCE_LOAD                 (1<<1)
+#define PSC0_BUFFER_VALID               (1<<0)
+#define PSC1_BUFFER_LENGTH(x)           ((x)&0xff)
+/* _3DSTATE_RASTERIZATION_RULES */
+#define _3DSTATE_RASTER_RULES_CMD       (CMD_3D|(0x07<<24))
+#define ENABLE_POINT_RASTER_RULE        (1<<15)
+#define OGL_POINT_RASTER_RULE           (1<<13)
+#define ENABLE_TEXKILL_3D_4D            (1<<10)
+#define TEXKILL_3D                      (0<<9)
+#define TEXKILL_4D                      (1<<9)
+#define ENABLE_LINE_STRIP_PROVOKE_VRTX  (1<<8)
+#define ENABLE_TRI_FAN_PROVOKE_VRTX     (1<<5)
+#define LINE_STRIP_PROVOKE_VRTX(x)      ((x)<<6)
+#define TRI_FAN_PROVOKE_VRTX(x)         ((x)<<3)
+/* _3DSTATE_SCISSOR_ENABLE, p256 */
+#define _3DSTATE_SCISSOR_ENABLE_CMD     (CMD_3D|(0x1c<<24)|(0x10<<19))
+#define ENABLE_SCISSOR_RECT             ((1<<1) | 1)
+#define DISABLE_SCISSOR_RECT            (1<<1)
+/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
+#define _3DSTATE_SCISSOR_RECT_0_CMD     (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
+/* Dword 1 */
+#define SCISSOR_RECT_0_YMIN(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMIN(x)          (x)
+/* Dword 2 */
+#define SCISSOR_RECT_0_YMAX(x)          ((x)<<16)
+#define SCISSOR_RECT_0_XMAX(x)          (x)
+/* p189 */
+#define _3DSTATE_LOAD_STATE_IMMEDIATE_1   ((0x3<<29)|(0x1d<<24)|(0x04<<16))
+#define I1_LOAD_S(n)                      (1<<(4+n))
+#define S0_VB_OFFSET_MASK              0xffffffc
+#define S0_AUTO_CACHE_INV_DISABLE      (1<<0)
+#define S1_VERTEX_WIDTH_SHIFT          24
+#define S1_VERTEX_WIDTH_MASK           (0x3f<<24)
+#define S1_VERTEX_PITCH_SHIFT          16
+#define S1_VERTEX_PITCH_MASK           (0x3f<<16)
+#define TEXCOORDFMT_2D                 0x0
+#define TEXCOORDFMT_3D                 0x1
+#define TEXCOORDFMT_4D                 0x2
+#define TEXCOORDFMT_1D                 0x3
+#define TEXCOORDFMT_2D_16              0x4
+#define TEXCOORDFMT_4D_16              0x5
+#define TEXCOORDFMT_NOT_PRESENT        0xf
+#define S2_TEXCOORD_FMT0_MASK            0xf
+#define S2_TEXCOORD_FMT1_SHIFT           4
+#define S2_TEXCOORD_FMT(unit, type)    ((type)<<(unit*4))
+#define S2_TEXCOORD_NONE               (~0)
+#define TEXCOORD_WRAP_SHORTEST_TCX      8
+#define TEXCOORD_WRAP_SHORTEST_TCY      4
+#define TEXCOORD_WRAP_SHORTEST_TCZ      2
+#define TEXCOORD_PERSPECTIVE_DISABLE    1
+#define S3_WRAP_SHORTEST_TCX(unit)      (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCY(unit)      (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4))
+#define S3_WRAP_SHORTEST_TCZ(unit)      (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4))
+#define S3_PERSPECTIVE_DISABLE(unit)    (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4))
+/* S3 not interesting */
+#define S4_POINT_WIDTH_SHIFT           23
+#define S4_POINT_WIDTH_MASK            (0x1ff<<23)
+#define S4_LINE_WIDTH_SHIFT            19
+#define S4_LINE_WIDTH_ONE              (0x2<<19)
+#define S4_LINE_WIDTH_MASK             (0xf<<19)
+#define S4_FLATSHADE_ALPHA             (1<<18)
+#define S4_FLATSHADE_FOG               (1<<17)
+#define S4_FLATSHADE_SPECULAR          (1<<16)
+#define S4_FLATSHADE_COLOR             (1<<15)
+#define S4_CULLMODE_BOTH               (0<<13)
+#define S4_CULLMODE_NONE               (1<<13)
+#define S4_CULLMODE_CW                 (2<<13)
+#define S4_CULLMODE_CCW                (3<<13)
+#define S4_CULLMODE_MASK               (3<<13)
+#define S4_VFMT_POINT_WIDTH            (1<<12)
+#define S4_VFMT_SPEC_FOG               (1<<11)
+#define S4_VFMT_COLOR                  (1<<10)
+#define S4_VFMT_DEPTH_OFFSET           (1<<9)
+#define S4_VFMT_XYZ                    (1<<6)
+#define S4_VFMT_XYZW                   (2<<6)
+#define S4_VFMT_XY                     (3<<6)
+#define S4_VFMT_XYW                    (4<<6)
+#define S4_VFMT_XYZW_MASK              (7<<6)
+#define S4_FORCE_DEFAULT_DIFFUSE       (1<<5)
+#define S4_FORCE_DEFAULT_SPECULAR      (1<<4)
+#define S4_LOCAL_DEPTH_OFFSET_ENABLE   (1<<3)
+#define S4_VFMT_FOG_PARAM              (1<<2)
+#define S4_SPRITE_POINT_ENABLE         (1<<1)
+#define S4_LINE_ANTIALIAS_ENABLE       (1<<0)
+#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH   |   \
+                      S4_VFMT_SPEC_FOG      |   \
+                      S4_VFMT_COLOR         |   \
+                      S4_VFMT_DEPTH_OFFSET  |   \
+                      S4_VFMT_XYZW_MASK     |   \
+                      S4_VFMT_FOG_PARAM)
+#define S5_WRITEDISABLE_ALPHA          (1<<31)
+#define S5_WRITEDISABLE_RED            (1<<30)
+#define S5_WRITEDISABLE_GREEN          (1<<29)
+#define S5_WRITEDISABLE_BLUE           (1<<28)
+#define S5_WRITEDISABLE_MASK           (0xf<<28)
+#define S5_FORCE_DEFAULT_POINT_SIZE    (1<<27)
+#define S5_LAST_PIXEL_ENABLE           (1<<26)
+#define S5_GLOBAL_DEPTH_OFFSET_ENABLE  (1<<25)
+#define S5_FOG_ENABLE                  (1<<24)
+#define S5_STENCIL_REF_SHIFT           16
+#define S5_STENCIL_REF_MASK            (0xff<<16)
+#define S5_STENCIL_TEST_FUNC_SHIFT     13
+#define S5_STENCIL_TEST_FUNC_MASK      (0x7<<13)
+#define S5_STENCIL_FAIL_SHIFT          10
+#define S5_STENCIL_FAIL_MASK           (0x7<<10)
+#define S5_STENCIL_PASS_Z_FAIL_SHIFT   7
+#define S5_STENCIL_PASS_Z_FAIL_MASK    (0x7<<7)
+#define S5_STENCIL_PASS_Z_PASS_SHIFT   4
+#define S5_STENCIL_PASS_Z_PASS_MASK    (0x7<<4)
+#define S5_STENCIL_WRITE_ENABLE        (1<<3)
+#define S5_STENCIL_TEST_ENABLE         (1<<2)
+#define S5_COLOR_DITHER_ENABLE         (1<<1)
+#define S5_LOGICOP_ENABLE              (1<<0)
+#define S6_ALPHA_TEST_ENABLE           (1<<31)
+#define S6_ALPHA_TEST_FUNC_SHIFT       28
+#define S6_ALPHA_TEST_FUNC_MASK        (0x7<<28)
+#define S6_ALPHA_REF_SHIFT             20
+#define S6_ALPHA_REF_MASK              (0xff<<20)
+#define S6_DEPTH_TEST_ENABLE           (1<<19)
+#define S6_DEPTH_TEST_FUNC_SHIFT       16
+#define S6_DEPTH_TEST_FUNC_MASK        (0x7<<16)
+#define S6_CBUF_BLEND_ENABLE           (1<<15)
+#define S6_CBUF_BLEND_FUNC_SHIFT       12
+#define S6_CBUF_BLEND_FUNC_MASK        (0x7<<12)
+#define S6_CBUF_SRC_BLEND_FACT_SHIFT   8
+#define S6_CBUF_SRC_BLEND_FACT_MASK    (0xf<<8)
+#define S6_CBUF_DST_BLEND_FACT_SHIFT   4
+#define S6_CBUF_DST_BLEND_FACT_MASK    (0xf<<4)
+#define S6_DEPTH_WRITE_ENABLE          (1<<3)
+#define S6_COLOR_WRITE_ENABLE          (1<<2)
+#define S6_TRISTRIP_PV_SHIFT           0
+#define S6_TRISTRIP_PV_MASK            (0x3<<0)
+#define S7_DEPTH_OFFSET_CONST_MASK     ~0
+/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
+/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
+/* _3DSTATE_MODES_4, p218 */
+#define _3DSTATE_MODES_4_CMD            (CMD_3D|(0x0d<<24))
+#define ENABLE_LOGIC_OP_FUNC            (1<<23)
+#define LOGIC_OP_FUNC(x)                ((x)<<18)
+#define LOGICOP_MASK                    (0xf<<18)
+#define LOGICOP_COPY                    0xc
+#define MODE4_ENABLE_STENCIL_TEST_MASK  ((1<<17)|(0xff00))
+#define ENABLE_STENCIL_TEST_MASK        (1<<17)
+#define STENCIL_TEST_MASK(x)            ((x)<<8)
+#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
+#define ENABLE_STENCIL_WRITE_MASK       (1<<16)
+#define STENCIL_WRITE_MASK(x)           ((x)&0xff)
+/* _3DSTATE_MODES_5, p220 */
+#define _3DSTATE_MODES_5_CMD            (CMD_3D|(0x0c<<24))
+#define PIPELINE_FLUSH_RENDER_CACHE     (1<<18)
+#define PIPELINE_FLUSH_TEXTURE_CACHE    (1<<16)
+/* p221 */
+#define _3DSTATE_PIXEL_SHADER_CONSTANTS  (CMD_3D|(0x1d<<24)|(0x6<<16))
+#define PS1_REG(n)                      (1<<(n))
+#define PS2_CONST_X(n)                  (n)
+#define PS3_CONST_Y(n)                  (n)
+#define PS4_CONST_Z(n)                  (n)
+#define PS5_CONST_W(n)                  (n)
+/* p222 */
+#define I915_MAX_TEX_INDIRECT 4
+#define I915_MAX_TEX_INSN     32
+#define I915_MAX_ALU_INSN     64
+#define I915_MAX_DECL_INSN    27
+#define I915_MAX_TEMPORARY    16
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space.  Maximum of 123 instructions.  Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
+#define REG_TYPE_R                 0    /* temporary regs, no need to
+                                         * dcl, must be written before
+                                         * read -- Preserved between
+                                         * phases.
+                                         */
+#define REG_TYPE_T                 1    /* Interpolated values, must be
+                                         * dcl'ed before use.
+                                         *
+                                         * 0..7: texture coord,
+                                         * 8: diffuse spec,
+                                         * 9: specular color,
+                                         * 10: fog parameter in w.
+                                         */
+#define REG_TYPE_CONST             2    /* Restriction: only one const
+                                         * can be referenced per
+                                         * instruction, though it may be
+                                         * selected for multiple inputs.
+                                         * Constants not initialized
+                                         * default to zero.
+                                         */
+#define REG_TYPE_S                 3    /* sampler */
+#define REG_TYPE_OC                4    /* output color (rgba) */
+#define REG_TYPE_OD                5    /* output depth (w), xyz are
+                                         * temporaries.  If not written,
+                                         * interpolated depth is used?
+                                         */
+#define REG_TYPE_U                 6    /* unpreserved temporaries */
+#define REG_TYPE_MASK              0x7
+#define REG_NR_MASK                0xf
+/* REG_TYPE_T:
+ */
+#define T_TEX0     0
+#define T_TEX1     1
+#define T_TEX2     2
+#define T_TEX3     3
+#define T_TEX4     4
+#define T_TEX5     5
+#define T_TEX6     6
+#define T_TEX7     7
+#define T_DIFFUSE  8
+#define T_SPECULAR 9
+#define T_FOG_W    10           /* interpolated fog is in W coord */
+/* Arithmetic instructions */
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP    (0x0<<24)     /* no operation */
+#define A0_ADD    (0x1<<24)     /* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)     /* dst = src0 */
+#define A0_MUL    (0x3<<24)     /* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)     /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)     /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)     /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)     /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)     /* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)     /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)     /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)     /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)     /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)     /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)     /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)     /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)    /* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)    /* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)    /* dst = int(src0) */
+#define A0_SGE    (0x13<<24)    /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)    /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE                 (1<<22)
+#define A0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X                (1<<10)
+#define A0_DEST_CHANNEL_Y                (2<<10)
+#define A0_DEST_CHANNEL_Z                (4<<10)
+#define A0_DEST_CHANNEL_W                (8<<10)
+#define A0_DEST_CHANNEL_ALL              (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT            10
+#define A0_SRC0_TYPE_SHIFT               7
+#define A0_SRC0_NR_SHIFT                 2
+#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+#define SRC_X        0
+#define SRC_Y        1
+#define SRC_Z        2
+#define SRC_W        3
+#define SRC_ZERO     4
+#define SRC_ONE      5
+#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT          28
+#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT          24
+#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT          20
+#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT          16
+#define A1_SRC1_TYPE_SHIFT               13
+#define A1_SRC1_NR_SHIFT                 8
+#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT          4
+#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT          0
+#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT          28
+#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT          24
+#define A2_SRC2_TYPE_SHIFT               21
+#define A2_SRC2_NR_SHIFT                 16
+#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT          12
+#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT          8
+#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT          4
+#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT          0
+/* Texture instructions */
+#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared
+                                 * sampler and address, and output
+                                 * filtered texel data to destination
+                                 * register */
+#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a
+                                 * perspective divide of the texture
+                                 * coordinate .xyz values by .w before
+                                 * sampling. */
+#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the
+                                 * computed LOD by w.  Only S4.6 two's
+                                 * comp is used.  This implies that a
+                                 * float to fixed conversion is
+                                 * done. */
+#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling
+                                 * operation.  Simply kills the pixel
+                                 * if any channel of the address
+                                 * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT              0      /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK               (0xf<<0)
+#define T1_ADDRESS_REG_TYPE_SHIFT        24     /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT          17
+#define T2_MBZ                           0
+/* Declaration instructions */
+#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib)
+                                 * register or an s (sampler)
+                                 * register. */
+#define D0_SAMPLE_TYPE_SHIFT              22
+#define D0_SAMPLE_TYPE_2D                 (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK               (0x3<<22)
+#define D0_TYPE_SHIFT                19
+/* Allow: T, S */
+#define D0_NR_SHIFT                  14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X                (1<<10)
+#define D0_CHANNEL_Y                (2<<10)
+#define D0_CHANNEL_Z                (4<<10)
+#define D0_CHANNEL_W                (8<<10)
+#define D0_CHANNEL_ALL              (0xf<<10)
+#define D0_CHANNEL_NONE             (0<<10)
+#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ                          0
+#define D2_MBZ                          0
+/* p207.
+ * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK
+ */
+#define _3DSTATE_MAP_STATE               (CMD_3D|(0x1d<<24)|(0x0<<16))
+#define MS1_MAPMASK_SHIFT               0
+#define MS1_MAPMASK_MASK                (0x8fff<<0)
+#define MS2_UNTRUSTED_SURFACE           (1<<31)
+#define MS2_ADDRESS_MASK                0xfffffffc
+#define MS2_VERTICAL_LINE_STRIDE        (1<<1)
+#define MS2_VERTICAL_OFFSET             (1<<1)
+#define MS3_HEIGHT_SHIFT              21
+#define MS3_WIDTH_SHIFT               10
+#define MS3_PALETTE_SELECT            (1<<9)
+#define MS3_MAPSURF_FORMAT_SHIFT      7
+#define MS3_MAPSURF_FORMAT_MASK       (0x7<<7)
+#define    MAPSURF_8BIT                    (1<<7)
+#define    MAPSURF_16BIT                   (2<<7)
+#define    MAPSURF_32BIT                   (3<<7)
+#define    MAPSURF_422                     (5<<7)
+#define    MAPSURF_COMPRESSED              (6<<7)
+#define    MAPSURF_4BIT_INDEXED            (7<<7)
+#define MS3_MT_FORMAT_MASK         (0x7 << 3)
+#define MS3_MT_FORMAT_SHIFT        3
+#define    MT_4BIT_IDX_ARGB8888            (7<<3)       /* SURFACE_4BIT_INDEXED */
+#define    MT_8BIT_I8                      (0<<3)       /* SURFACE_8BIT */
+#define    MT_8BIT_L8                      (1<<3)
+#define    MT_8BIT_A8                      (4<<3)
+#define    MT_8BIT_MONO8                   (5<<3)
+#define    MT_16BIT_RGB565                 (0<<3)       /* SURFACE_16BIT */
+#define    MT_16BIT_ARGB1555               (1<<3)
+#define    MT_16BIT_ARGB4444               (2<<3)
+#define    MT_16BIT_AY88                   (3<<3)
+#define    MT_16BIT_88DVDU                 (5<<3)
+#define    MT_16BIT_BUMP_655LDVDU          (6<<3)
+#define    MT_16BIT_I16                    (7<<3)
+#define    MT_16BIT_L16                    (8<<3)
+#define    MT_16BIT_A16                    (9<<3)
+#define    MT_32BIT_ARGB8888               (0<<3)       /* SURFACE_32BIT */
+#define    MT_32BIT_ABGR8888               (1<<3)
+#define    MT_32BIT_XRGB8888               (2<<3)
+#define    MT_32BIT_XBGR8888               (3<<3)
+#define    MT_32BIT_QWVU8888               (4<<3)
+#define    MT_32BIT_AXVU8888               (5<<3)
+#define    MT_32BIT_LXVU8888               (6<<3)
+#define    MT_32BIT_XLVU8888               (7<<3)
+#define    MT_32BIT_ARGB2101010            (8<<3)
+#define    MT_32BIT_ABGR2101010            (9<<3)
+#define    MT_32BIT_AWVU2101010            (0xA<<3)
+#define    MT_32BIT_GR1616                 (0xB<<3)
+#define    MT_32BIT_VU1616                 (0xC<<3)
+#define    MT_32BIT_xI824                  (0xD<<3)
+#define    MT_32BIT_xA824                  (0xE<<3)
+#define    MT_32BIT_xL824                  (0xF<<3)
+#define    MT_422_YCRCB_SWAPY              (0<<3)       /* SURFACE_422 */
+#define    MT_422_YCRCB_NORMAL             (1<<3)
+#define    MT_422_YCRCB_SWAPUV             (2<<3)
+#define    MT_422_YCRCB_SWAPUVY            (3<<3)
+#define    MT_COMPRESS_DXT1                (0<<3)       /* SURFACE_COMPRESSED */
+#define    MT_COMPRESS_DXT2_3              (1<<3)
+#define    MT_COMPRESS_DXT4_5              (2<<3)
+#define    MT_COMPRESS_FXT1                (3<<3)
+#define    MT_COMPRESS_DXT1_RGB            (4<<3)
+#define MS3_USE_FENCE_REGS              (1<<2)
+#define MS3_TILED_SURFACE             (1<<1)
+#define MS3_TILE_WALK                 (1<<0)
+/* The pitch is the pitch measured in DWORDS, minus 1 */
+#define MS4_PITCH_SHIFT                 21
+#define MS4_CUBE_FACE_ENA_NEGX          (1<<20)
+#define MS4_CUBE_FACE_ENA_POSX          (1<<19)
+#define MS4_CUBE_FACE_ENA_NEGY          (1<<18)
+#define MS4_CUBE_FACE_ENA_POSY          (1<<17)
+#define MS4_CUBE_FACE_ENA_NEGZ          (1<<16)
+#define MS4_CUBE_FACE_ENA_POSZ          (1<<15)
+#define MS4_CUBE_FACE_ENA_MASK          (0x3f<<15)
+#define MS4_MAX_LOD_SHIFT               9
+#define MS4_MAX_LOD_MASK                (0x3f<<9)
+#define MS4_MIP_LAYOUT_LEGACY           (0<<8)
+#define MS4_MIP_LAYOUT_BELOW_LPT        (0<<8)
+#define MS4_MIP_LAYOUT_RIGHT_LPT        (1<<8)
+#define MS4_VOLUME_DEPTH_SHIFT          0
+#define MS4_VOLUME_DEPTH_MASK           (0xff<<0)
+/* p244.
+ * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK.
+ */
+#define _3DSTATE_SAMPLER_STATE         (CMD_3D|(0x1d<<24)|(0x1<<16))
+#define SS1_MAPMASK_SHIFT               0
+#define SS1_MAPMASK_MASK                (0x8fff<<0)
+#define SS2_REVERSE_GAMMA_ENABLE        (1<<31)
+#define SS2_PACKED_TO_PLANAR_ENABLE     (1<<30)
+#define SS2_COLORSPACE_CONVERSION       (1<<29)
+#define SS2_CHROMAKEY_SHIFT             27
+#define SS2_BASE_MIP_LEVEL_SHIFT        22
+#define SS2_BASE_MIP_LEVEL_MASK         (0x1f<<22)
+#define SS2_MIP_FILTER_SHIFT            20
+#define SS2_MIP_FILTER_MASK             (0x3<<20)
+#define   MIPFILTER_NONE        0
+#define   MIPFILTER_NEAREST     1
+#define   MIPFILTER_LINEAR      3
+#define SS2_MAG_FILTER_SHIFT          17
+#define SS2_MAG_FILTER_MASK           (0x7<<17)
+#define   FILTER_NEAREST        0
+#define   FILTER_LINEAR         1
+#define   FILTER_ANISOTROPIC    2
+#define   FILTER_4X4_1          3
+#define   FILTER_4X4_2          4
+#define   FILTER_4X4_FLAT       5
+#define   FILTER_6X5_MONO       6       /* XXX - check */
+#define SS2_MIN_FILTER_SHIFT          14
+#define SS2_MIN_FILTER_MASK           (0x7<<14)
+#define SS2_LOD_BIAS_SHIFT            5
+#define SS2_LOD_BIAS_ONE              (0x10<<5)
+#define SS2_LOD_BIAS_MASK             (0x1ff<<5)
+/* Shadow requires:
+ *  MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
+ *  FILTER_4X4_x  MIN and MAG filters
+ */
+#define SS2_SHADOW_ENABLE             (1<<4)
+#define SS2_MAX_ANISO_MASK            (1<<3)
+#define SS2_MAX_ANISO_2               (0<<3)
+#define SS2_MAX_ANISO_4               (1<<3)
+#define SS2_SHADOW_FUNC_SHIFT         0
+#define SS2_SHADOW_FUNC_MASK          (0x7<<0)
+/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
+#define SS3_MIN_LOD_SHIFT            24
+#define SS3_MIN_LOD_ONE              (0x10<<24)
+#define SS3_MIN_LOD_MASK             (0xff<<24)
+#define SS3_KILL_PIXEL_ENABLE        (1<<17)
+#define SS3_TCX_ADDR_MODE_SHIFT      12
+#define SS3_TCX_ADDR_MODE_MASK       (0x7<<12)
+#define   TEXCOORDMODE_WRAP             0
+#define   TEXCOORDMODE_MIRROR           1
+#define   TEXCOORDMODE_CLAMP_EDGE       2
+#define   TEXCOORDMODE_CUBE             3
+#define   TEXCOORDMODE_CLAMP_BORDER     4
+#define   TEXCOORDMODE_MIRROR_ONCE      5
+#define SS3_TCY_ADDR_MODE_SHIFT      9
+#define SS3_TCY_ADDR_MODE_MASK       (0x7<<9)
+#define SS3_TCZ_ADDR_MODE_SHIFT      6
+#define SS3_TCZ_ADDR_MODE_MASK       (0x7<<6)
+#define SS3_NORMALIZED_COORDS        (1<<5)
+#define SS3_TEXTUREMAP_INDEX_SHIFT   1
+#define SS3_TEXTUREMAP_INDEX_MASK    (0xf<<1)
+#define SS3_DEINTERLACER_ENABLE      (1<<0)
+#define SS4_BORDER_COLOR_MASK        (~0)
+/* 3DSTATE_SPAN_STIPPLE, p258
+ */
+#define _3DSTATE_STIPPLE           ((0x3<<29)|(0x1d<<24)|(0x83<<16))
+#define ST1_ENABLE               (1<<16)
+#define ST1_MASK                 (0xffff)
+#define FLUSH_MAP_CACHE    (1<<0)
+#define FLUSH_RENDER_CACHE (1<<1)
+#endif
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2006,2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+/* Each instruction is 3 dwords long, though most don't require all
+ * this space.  Maximum of 123 instructions.  Smaller maxes per insn
+ * type.
+ */
+#define _3DSTATE_PIXEL_SHADER_PROGRAM    (CMD_3D|(0x1d<<24)|(0x5<<16))
+#define REG_TYPE_R                 0 /* temporary regs, no need to
+                                      * dcl, must be written before
+                                      * read -- Preserved between
+                                      * phases.
+                                      */
+#define REG_TYPE_T                 1 /* Interpolated values, must be
+                                      * dcl'ed before use.
+                                      *
+                                      * 0..7: texture coord,
+                                      * 8: diffuse spec,
+                                      * 9: specular color,
+                                      * 10: fog parameter in w.
+                                      */
+#define REG_TYPE_CONST             2 /* Restriction: only one const
+                                      * can be referenced per
+                                      * instruction, though it may be
+                                      * selected for multiple inputs.
+                                      * Constants not initialized
+                                      * default to zero.
+                                      */
+#define REG_TYPE_S                 3 /* sampler */
+#define REG_TYPE_OC                4 /* output color (rgba) */
+#define REG_TYPE_OD                5 /* output depth (w), xyz are
+                                      * temporaries.  If not written,
+                                      * interpolated depth is used?
+                                      */
+#define REG_TYPE_U                 6 /* unpreserved temporaries */
+#define REG_TYPE_MASK              0x7
+#define REG_TYPE_SHIFT             4
+#define REG_NR_MASK                0xf
+/* REG_TYPE_T:
+*/
+#define T_TEX0     0
+#define T_TEX1     1
+#define T_TEX2     2
+#define T_TEX3     3
+#define T_TEX4     4
+#define T_TEX5     5
+#define T_TEX6     6
+#define T_TEX7     7
+#define T_DIFFUSE  8
+#define T_SPECULAR 9
+#define T_FOG_W    10           /* interpolated fog is in W coord */
+/* Arithmetic instructions */
+/* .replicate_swizzle == selection and replication of a particular
+ * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
+ */
+#define A0_NOP    (0x0<<24)             /* no operation */
+#define A0_ADD    (0x1<<24)             /* dst = src0 + src1 */
+#define A0_MOV    (0x2<<24)             /* dst = src0 */
+#define A0_MUL    (0x3<<24)             /* dst = src0 * src1 */
+#define A0_MAD    (0x4<<24)             /* dst = src0 * src1 + src2 */
+#define A0_DP2ADD (0x5<<24)             /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
+#define A0_DP3    (0x6<<24)             /* dst.xyzw = src0.xyz dot src1.xyz */
+#define A0_DP4    (0x7<<24)             /* dst.xyzw = src0.xyzw dot src1.xyzw */
+#define A0_FRC    (0x8<<24)             /* dst = src0 - floor(src0) */
+#define A0_RCP    (0x9<<24)             /* dst.xyzw = 1/(src0.replicate_swizzle) */
+#define A0_RSQ    (0xa<<24)             /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
+#define A0_EXP    (0xb<<24)             /* dst.xyzw = exp2(src0.replicate_swizzle) */
+#define A0_LOG    (0xc<<24)             /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
+#define A0_CMP    (0xd<<24)             /* dst = (src0 >= 0.0) ? src1 : src2 */
+#define A0_MIN    (0xe<<24)             /* dst = (src0 < src1) ? src0 : src1 */
+#define A0_MAX    (0xf<<24)             /* dst = (src0 >= src1) ? src0 : src1 */
+#define A0_FLR    (0x10<<24)            /* dst = floor(src0) */
+#define A0_MOD    (0x11<<24)            /* dst = src0 fmod 1.0 */
+#define A0_TRC    (0x12<<24)            /* dst = int(src0) */
+#define A0_SGE    (0x13<<24)            /* dst = src0 >= src1 ? 1.0 : 0.0 */
+#define A0_SLT    (0x14<<24)            /* dst = src0 < src1 ? 1.0 : 0.0 */
+#define A0_DEST_SATURATE                 (1<<22)
+#define A0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+#define A0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define A0_DEST_CHANNEL_X                (1<<10)
+#define A0_DEST_CHANNEL_Y                (2<<10)
+#define A0_DEST_CHANNEL_Z                (4<<10)
+#define A0_DEST_CHANNEL_W                (8<<10)
+#define A0_DEST_CHANNEL_ALL              (0xf<<10)
+#define A0_DEST_CHANNEL_SHIFT            10
+#define A0_SRC0_TYPE_SHIFT               7
+#define A0_SRC0_NR_SHIFT                 2
+#define A0_DEST_CHANNEL_XY              (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
+#define A0_DEST_CHANNEL_XYZ             (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
+#define SRC_X        0
+#define SRC_Y        1
+#define SRC_Z        2
+#define SRC_W        3
+#define SRC_ZERO     4
+#define SRC_ONE      5
+#define A1_SRC0_CHANNEL_X_NEGATE         (1<<31)
+#define A1_SRC0_CHANNEL_X_SHIFT          28
+#define A1_SRC0_CHANNEL_Y_NEGATE         (1<<27)
+#define A1_SRC0_CHANNEL_Y_SHIFT          24
+#define A1_SRC0_CHANNEL_Z_NEGATE         (1<<23)
+#define A1_SRC0_CHANNEL_Z_SHIFT          20
+#define A1_SRC0_CHANNEL_W_NEGATE         (1<<19)
+#define A1_SRC0_CHANNEL_W_SHIFT          16
+#define A1_SRC1_TYPE_SHIFT               13
+#define A1_SRC1_NR_SHIFT                 8
+#define A1_SRC1_CHANNEL_X_NEGATE         (1<<7)
+#define A1_SRC1_CHANNEL_X_SHIFT          4
+#define A1_SRC1_CHANNEL_Y_NEGATE         (1<<3)
+#define A1_SRC1_CHANNEL_Y_SHIFT          0
+#define A2_SRC1_CHANNEL_Z_NEGATE         (1<<31)
+#define A2_SRC1_CHANNEL_Z_SHIFT          28
+#define A2_SRC1_CHANNEL_W_NEGATE         (1<<27)
+#define A2_SRC1_CHANNEL_W_SHIFT          24
+#define A2_SRC2_TYPE_SHIFT               21
+#define A2_SRC2_NR_SHIFT                 16
+#define A2_SRC2_CHANNEL_X_NEGATE         (1<<15)
+#define A2_SRC2_CHANNEL_X_SHIFT          12
+#define A2_SRC2_CHANNEL_Y_NEGATE         (1<<11)
+#define A2_SRC2_CHANNEL_Y_SHIFT          8
+#define A2_SRC2_CHANNEL_Z_NEGATE         (1<<7)
+#define A2_SRC2_CHANNEL_Z_SHIFT          4
+#define A2_SRC2_CHANNEL_W_NEGATE         (1<<3)
+#define A2_SRC2_CHANNEL_W_SHIFT          0
+/* Texture instructions */
+#define T0_TEXLD     (0x15<<24) /* Sample texture using predeclared
+                                 * sampler and address, and output
+                                 * filtered texel data to destination
+                                 * register */
+#define T0_TEXLDP    (0x16<<24) /* Same as texld but performs a
+                                 * perspective divide of the texture
+                                 * coordinate .xyz values by .w before
+                                 * sampling. */
+#define T0_TEXLDB    (0x17<<24) /* Same as texld but biases the
+                                 * computed LOD by w.  Only S4.6 two's
+                                 * comp is used.  This implies that a
+                                 * float to fixed conversion is
+                                 * done. */
+#define T0_TEXKILL   (0x18<<24) /* Does not perform a sampling
+                                 * operation.  Simply kills the pixel
+                                 * if any channel of the address
+                                 * register is < 0.0. */
+#define T0_DEST_TYPE_SHIFT                19
+/* Allow: R, OC, OD, U */
+/* Note: U (unpreserved) regs do not retain their values between
+ * phases (cannot be used for feedback)
+ *
+ * Note: oC and OD registers can only be used as the destination of a
+ * texture instruction once per phase (this is an implementation
+ * restriction).
+ */
+#define T0_DEST_NR_SHIFT                 14
+/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
+#define T0_SAMPLER_NR_SHIFT              0 /* This field ignored for TEXKILL */
+#define T0_SAMPLER_NR_MASK               (0xf<<0)
+#define T1_ADDRESS_REG_TYPE_SHIFT        24 /* Reg to use as texture coord */
+/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
+#define T1_ADDRESS_REG_NR_SHIFT          17
+#define T2_MBZ                           0
+/* Declaration instructions */
+#define D0_DCL       (0x19<<24) /* Declare a t (interpolated attrib)
+                                 * register or an s (sampler)
+                                 * register. */
+#define D0_SAMPLE_TYPE_SHIFT              22
+#define D0_SAMPLE_TYPE_2D                 (0x0<<22)
+#define D0_SAMPLE_TYPE_CUBE               (0x1<<22)
+#define D0_SAMPLE_TYPE_VOLUME             (0x2<<22)
+#define D0_SAMPLE_TYPE_MASK               (0x3<<22)
+#define D0_TYPE_SHIFT                19
+/* Allow: T, S */
+#define D0_NR_SHIFT                  14
+/* Allow T: 0..10, S: 0..15 */
+#define D0_CHANNEL_X                (1<<10)
+#define D0_CHANNEL_Y                (2<<10)
+#define D0_CHANNEL_Z                (4<<10)
+#define D0_CHANNEL_W                (8<<10)
+#define D0_CHANNEL_ALL              (0xf<<10)
+#define D0_CHANNEL_NONE             (0<<10)
+#define D0_CHANNEL_XY               (D0_CHANNEL_X|D0_CHANNEL_Y)
+#define D0_CHANNEL_XYZ              (D0_CHANNEL_XY|D0_CHANNEL_Z)
+/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
+ * or specular declarations.
+ *
+ * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
+ *
+ * Must be zero for S (sampler) dcls
+ */
+#define D1_MBZ                          0
+#define D2_MBZ                          0
+/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic
+ * operations
+ */
+#define MASK_X                  0x1
+#define MASK_Y                  0x2
+#define MASK_Z                  0x4
+#define MASK_W                  0x8
+#define MASK_XYZ                (MASK_X | MASK_Y | MASK_Z)
+#define MASK_XYZW               (MASK_XYZ | MASK_W)
+#define MASK_SATURATE           0x10
+/* Temporary, undeclared regs. Preserved between phases */
+#define FS_R0                   ((REG_TYPE_R << REG_TYPE_SHIFT) | 0)
+#define FS_R1                   ((REG_TYPE_R << REG_TYPE_SHIFT) | 1)
+#define FS_R2                   ((REG_TYPE_R << REG_TYPE_SHIFT) | 2)
+#define FS_R3                   ((REG_TYPE_R << REG_TYPE_SHIFT) | 3)
+/* Texture coordinate regs.  Must be declared. */
+#define FS_T0                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 0)
+#define FS_T1                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 1)
+#define FS_T2                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 2)
+#define FS_T3                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 3)
+#define FS_T4                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 4)
+#define FS_T5                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 5)
+#define FS_T6                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 6)
+#define FS_T7                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 7)
+#define FS_T8                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 8)
+#define FS_T9                   ((REG_TYPE_T << REG_TYPE_SHIFT) | 9)
+#define FS_T10                  ((REG_TYPE_T << REG_TYPE_SHIFT) | 10)
+/* Constant values */
+#define FS_C0                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0)
+#define FS_C1                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1)
+#define FS_C2                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2)
+#define FS_C3                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3)
+#define FS_C4                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4)
+#define FS_C5                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5)
+#define FS_C6                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6)
+#define FS_C7                   ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7)
+/* Sampler regs */
+#define FS_S0                   ((REG_TYPE_S << REG_TYPE_SHIFT) | 0)
+#define FS_S1                   ((REG_TYPE_S << REG_TYPE_SHIFT) | 1)
+#define FS_S2                   ((REG_TYPE_S << REG_TYPE_SHIFT) | 2)
+#define FS_S3                   ((REG_TYPE_S << REG_TYPE_SHIFT) | 3)
+/* Output color */
+#define FS_OC                   ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0)
+/* Output depth */
+#define FS_OD                   ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0)
+/* Unpreserved temporary regs */
+#define FS_U0                   ((REG_TYPE_U << REG_TYPE_SHIFT) | 0)
+#define FS_U1                   ((REG_TYPE_U << REG_TYPE_SHIFT) | 1)
+#define FS_U2                   ((REG_TYPE_U << REG_TYPE_SHIFT) | 2)
+#define FS_U3                   ((REG_TYPE_U << REG_TYPE_SHIFT) | 3)
+#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3)
+#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4)
+#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4)
+#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4)
+#define REG_CHANNEL_MASK 0xf
+#define REG_NR(reg)             ((reg) & REG_NR_MASK)
+#define REG_TYPE(reg)           (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK)
+#define REG_X(reg)              (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Y(reg)              (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_Z(reg)              (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+#define REG_W(reg)              (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
+enum gen3_fs_channel {
+        X_CHANNEL_VAL = 0,
+        Y_CHANNEL_VAL,
+        Z_CHANNEL_VAL,
+        W_CHANNEL_VAL,
+        ZERO_CHANNEL_VAL,
+        ONE_CHANNEL_VAL,
+        NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8,
+        NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8,
+        NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8,
+        NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8,
+        NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8
+};
+#define gen3_fs_operand(reg, x, y, z, w) \
+        (reg) | \
+(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \
+(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \
+(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \
+(w##_CHANNEL_VAL << W_CHANNEL_SHIFT)
+/**
+ * Construct an operand description for using a register with no swizzling
+ */
+#define gen3_fs_operand_reg(reg)                                        \
+        gen3_fs_operand(reg, X, Y, Z, W)
+#define gen3_fs_operand_reg_negate(reg)                                 \
+        gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W)
+/**
+ * Returns an operand containing (0.0, 0.0, 0.0, 0.0).
+ */
+#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO)
+/**
+ * Returns an unused operand
+ */
+#define gen3_fs_operand_none() gen3_fs_operand_zero()
+/**
+ * Returns an operand containing (1.0, 1.0, 1.0, 1.0).
+ */
+#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE)
+#define gen3_get_hardware_channel_val(val, shift, negate) \
+        (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0))
+/**
+ * Outputs a fragment shader command to declare a sampler or texture register.
+ */
+#define gen3_fs_dcl(reg)                                                \
+        do {                                                                    \
+                OUT_BATCH(D0_DCL | \
+                          (REG_TYPE(reg) << D0_TYPE_SHIFT) | \
+                          (REG_NR(reg) << D0_NR_SHIFT) | \
+                          ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \
+                OUT_BATCH(0); \
+                OUT_BATCH(0); \
+        } while (0)
+#define gen3_fs_texld(dest_reg, sampler_reg, address_reg)               \
+        do {                                                                    \
+                OUT_BATCH(T0_TEXLD | \
+                          (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+                          (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+                          (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+                OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+                          (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+                OUT_BATCH(0); \
+        } while (0)
+#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg)              \
+        do {                                                                    \
+                OUT_BATCH(T0_TEXLDP | \
+                          (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
+                          (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
+                          (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
+                OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
+                          (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
+                OUT_BATCH(0); \
+        } while (0)
+#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2)     \
+        _gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2)
+#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2)       \
+        _gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2)
+#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \
+        do { \
+                /* Set up destination register and write mask */ \
+                OUT_BATCH(cmd | \
+                          (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+                          (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+                          (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \
+                          (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \
+                          /* Set up operand 0 */ \
+                          (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+                          (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+                OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+                                                        A1_SRC0_CHANNEL_X_SHIFT, \
+                                                        A1_SRC0_CHANNEL_X_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_Y(operand0), \
+                                                        A1_SRC0_CHANNEL_Y_SHIFT, \
+                                                        A1_SRC0_CHANNEL_Y_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_Z(operand0), \
+                                                        A1_SRC0_CHANNEL_Z_SHIFT, \
+                                                        A1_SRC0_CHANNEL_Z_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_W(operand0), \
+                                                        A1_SRC0_CHANNEL_W_SHIFT, \
+                                                        A1_SRC0_CHANNEL_W_NEGATE) | \
+                          /* Set up operand 1 */ \
+                          (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+                          (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+                          gen3_get_hardware_channel_val(REG_X(operand1), \
+                                                        A1_SRC1_CHANNEL_X_SHIFT, \
+                                                        A1_SRC1_CHANNEL_X_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_Y(operand1), \
+                                                        A1_SRC1_CHANNEL_Y_SHIFT, \
+                                                        A1_SRC1_CHANNEL_Y_NEGATE)); \
+                OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+                                                        A2_SRC1_CHANNEL_Z_SHIFT, \
+                                                        A2_SRC1_CHANNEL_Z_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_W(operand1), \
+                                                        A2_SRC1_CHANNEL_W_SHIFT, \
+                                                        A2_SRC1_CHANNEL_W_NEGATE) | \
+                          /* Set up operand 2 */ \
+                          (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+                          (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+                          gen3_get_hardware_channel_val(REG_X(operand2), \
+                                                        A2_SRC2_CHANNEL_X_SHIFT, \
+                                                        A2_SRC2_CHANNEL_X_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_Y(operand2), \
+                                                        A2_SRC2_CHANNEL_Y_SHIFT, \
+                                                        A2_SRC2_CHANNEL_Y_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_Z(operand2), \
+                                                        A2_SRC2_CHANNEL_Z_SHIFT, \
+                                                        A2_SRC2_CHANNEL_Z_NEGATE) | \
+                          gen3_get_hardware_channel_val(REG_W(operand2), \
+                                                        A2_SRC2_CHANNEL_W_SHIFT, \
+                                                        A2_SRC2_CHANNEL_W_NEGATE)); \
+        } while (0)
+#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\
+        /* Set up destination register and write mask */ \
+        OUT_BATCH(cmd | \
+                  (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
+                  (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
+                  (A0_DEST_CHANNEL_ALL) | \
+                  /* Set up operand 0 */ \
+                  (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
+                  (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
+        OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
+                                                A1_SRC0_CHANNEL_X_SHIFT, \
+                                                A1_SRC0_CHANNEL_X_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_Y(operand0), \
+                                                A1_SRC0_CHANNEL_Y_SHIFT, \
+                                                A1_SRC0_CHANNEL_Y_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_Z(operand0), \
+                                                A1_SRC0_CHANNEL_Z_SHIFT, \
+                                                A1_SRC0_CHANNEL_Z_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_W(operand0), \
+                                                A1_SRC0_CHANNEL_W_SHIFT, \
+                                                A1_SRC0_CHANNEL_W_NEGATE) | \
+                  /* Set up operand 1 */ \
+                  (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
+                  (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
+                  gen3_get_hardware_channel_val(REG_X(operand1), \
+                                                A1_SRC1_CHANNEL_X_SHIFT, \
+                                                A1_SRC1_CHANNEL_X_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_Y(operand1), \
+                                                A1_SRC1_CHANNEL_Y_SHIFT, \
+                                                A1_SRC1_CHANNEL_Y_NEGATE)); \
+        OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
+                                                A2_SRC1_CHANNEL_Z_SHIFT, \
+                                                A2_SRC1_CHANNEL_Z_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_W(operand1), \
+                                                A2_SRC1_CHANNEL_W_SHIFT, \
+                                                A2_SRC1_CHANNEL_W_NEGATE) | \
+                  /* Set up operand 2 */ \
+                  (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
+                  (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
+                  gen3_get_hardware_channel_val(REG_X(operand2), \
+                                                A2_SRC2_CHANNEL_X_SHIFT, \
+                                                A2_SRC2_CHANNEL_X_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_Y(operand2), \
+                                                A2_SRC2_CHANNEL_Y_SHIFT, \
+                                                A2_SRC2_CHANNEL_Y_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_Z(operand2), \
+                                                A2_SRC2_CHANNEL_Z_SHIFT, \
+                                                A2_SRC2_CHANNEL_Z_NEGATE) | \
+                  gen3_get_hardware_channel_val(REG_W(operand2), \
+                                                A2_SRC2_CHANNEL_W_SHIFT, \
+                                                A2_SRC2_CHANNEL_W_NEGATE)); \
+} while (0)
+#define gen3_fs_mov(dest_reg, operand0)                                 \
+        gen3_fs_arith(MOV, dest_reg, \
+                      operand0,                 \
+                      gen3_fs_operand_none(),                   \
+                      gen3_fs_operand_none())
+#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0)               \
+        gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \
+                              operand0, \
+                              gen3_fs_operand_none(), \
+                              gen3_fs_operand_none())
+#define gen3_fs_frc(dest_reg, operand0)                                 \
+        gen3_fs_arith (FRC, dest_reg, \
+                       operand0,                        \
+                       gen3_fs_operand_none(),                  \
+                       gen3_fs_operand_none())
+/** Add operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_add(dest_reg, operand0, operand1)                       \
+        gen3_fs_arith (ADD, dest_reg, \
+                       operand0, operand1,      \
+                       gen3_fs_operand_none())
+/** Multiply operand0 and operand1 and put the result in dest_reg */
+#define gen3_fs_mul(dest_reg, operand0, operand1)                       \
+        gen3_fs_arith (MUL, dest_reg, \
+                       operand0, operand1,      \
+                       gen3_fs_operand_none())
+/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rcp(dest_reg, dest_mask, operand0)              \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \
+                                              operand0,                 \
+                                              gen3_fs_operand_none (),                  \
+                                              gen3_fs_operand_none ());                 \
+                } else { \
+                        gen3_fs_arith (RCP, dest_reg, \
+                                       operand0, \
+                                       gen3_fs_operand_none (), \
+                                       gen3_fs_operand_none ()); \
+                } \
+        } while (0)
+/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */
+#define gen3_fs_rsq(dest_reg, dest_mask, operand0)              \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \
+                                              operand0,                 \
+                                              gen3_fs_operand_none (),                  \
+                                              gen3_fs_operand_none ());                 \
+                } else { \
+                        gen3_fs_arith (RSQ, dest_reg, \
+                                       operand0, \
+                                       gen3_fs_operand_none (), \
+                                       gen3_fs_operand_none ()); \
+                } \
+        } while (0)
+/** Puts the minimum of operand0 and operand1 in dest_reg */
+#define gen3_fs_min(dest_reg, operand0, operand1)                       \
+        gen3_fs_arith (MIN, dest_reg, \
+                       operand0, operand1, \
+                       gen3_fs_operand_none())
+/** Puts the maximum of operand0 and operand1 in dest_reg */
+#define gen3_fs_max(dest_reg, operand0, operand1)                       \
+        gen3_fs_arith (MAX, dest_reg, \
+                       operand0, operand1, \
+                       gen3_fs_operand_none())
+#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2)             \
+        gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2)
+/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */
+#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \
+                } else { \
+                        gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \
+                } \
+        } while (0)
+#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2)      \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \
+                } else { \
+                        gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \
+                } \
+        } while (0)
+/**
+ * Perform a 3-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1)      \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \
+                                              op0, op1,\
+                                              gen3_fs_operand_none());                  \
+                } else { \
+                        gen3_fs_arith (DP3, dest_reg, op0, op1,\
+                                       gen3_fs_operand_none());                 \
+                } \
+        } while (0)
+/**
+ * Perform a 4-component dot-product of operand0 and operand1 and put the
+ * resulting scalar in the channels of dest_reg specified by the dest_mask.
+ */
+#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1)      \
+        do {                                                                    \
+                if (dest_mask) {                                                        \
+                        gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \
+                                              op0, op1,\
+                                              gen3_fs_operand_none());                  \
+                } else { \
+                        gen3_fs_arith (DP4, dest_reg, op0, op1,\
+                                       gen3_fs_operand_none());                 \
+                } \
+        } while (0)
+#define SHADER_TRAPEZOIDS (1 << 24)

 /drivers/video/Intel-2D/kgem-sna.c
 ,7 → 44,6
 }
 unsigned int cpu_cache_size();
 static struct kgem_bo *
 ,8 → 793,8
 static bool kgem_init_pinned_batches(struct kgem *kgem)
 {
-        int count[2] = { 4, 2 };
-        int size[2] = { 1, 4 };
+        int count[2] = { 2, 1 };
+        int size[2] = { 1, 2 };
         int n, i;
         if (kgem->wedged)
 ,7 → 1338,6
 static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 {
         DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
-        printf("%s: handle=%d\n", __FUNCTION__, bo->handle);
         assert(bo->refcnt == 0);
         assert(bo->exec == NULL);
 ,8 → 1599,6
 {
         DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
-        printf("%s: handle=%d\n", __FUNCTION__, bo->handle);
         assert(list_is_empty(&bo->list));
         assert(bo->refcnt == 0);
         assert(!bo->purged);
 ,8 → 4204,8
     bo->scanout   = 1;
         fb->fb_bo     = bo;
-    printf("fb width %d height %d pitch %d bo %p\n",
-            fb->width, fb->height, fb->pitch, fb->fb_bo);
+//    printf("fb width %d height %d pitch %d bo %p\n",
+//            fb->width, fb->height, fb->pitch, fb->fb_bo);
     return 1;
 };

 /drivers/video/Intel-2D/kgem.h
 ,7 → 28,7
 #ifndef KGEM_H
 #define KGEM_H
-#define HAS_DEBUG_FULL 1
+#define HAS_DEBUG_FULL 0
 #include <stdint.h>
 #include <stdbool.h>
 ,6 → 40,13
 #include "compiler.h"
 #include "intel_list.h"
+static inline void delay(uint32_t time)
+{
+    __asm__ __volatile__(
+    "int $0x40"
+    ::"a"(5), "b"(time)
+    :"memory");
+};
 #if HAS_DEBUG_FULL

/drivers/video/Intel-2D/render/exa_sf.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
{ 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
{ 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
{ 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
{ 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },

/drivers/video/Intel-2D/render/exa_sf_mask.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
{ 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },

/drivers/video/Intel-2D/render/exa_wm_ca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },

/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },

/drivers/video/Intel-2D/render/exa_wm_mask_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x000000a0, 0x008d0100 },
{ 0x00802048, 0x210077be, 0x000000a4, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x000000b0, 0x008d0100 },
{ 0x00802048, 0x214077be, 0x000000b4, 0x008d0140 },

/drivers/video/Intel-2D/render/exa_wm_mask_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 },
{ 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 },

 /drivers/video/Intel-2D/render/exa_wm_mask_projective.g5b
 ,0 → 1,16
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
+   { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
+   { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
+   { 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
+   { 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },

 /drivers/video/Intel-2D/render/exa_wm_mask_projective.g7b
 ,0 → 1,12
+   { 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
+   { 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
+   { 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 },

/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x23801c09, 0x20000000, 0x0a2a0102 },

/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 },

/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x22c01c09, 0x20000000, 0x0a8a0102 },

/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 },

/drivers/video/Intel-2D/render/exa_wm_noca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },

/drivers/video/Intel-2D/render/exa_wm_src_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
{ 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
{ 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },

/drivers/video/Intel-2D/render/exa_wm_src_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x284077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x286077bd, 0x000000c0, 0x008d0080 },
{ 0x0060005a, 0x288077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x28a077bd, 0x000000d0, 0x008d0080 },

 /drivers/video/Intel-2D/render/exa_wm_src_projective.g5b
 ,0 → 1,16
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
+   { 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
+   { 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
+   { 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
+   { 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
+   { 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
+   { 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
+   { 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },

 /drivers/video/Intel-2D/render/exa_wm_src_projective.g7b
 ,0 → 1,12
+   { 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
+   { 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
+   { 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
+   { 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
+   { 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 },
+   { 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
+   { 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
+   { 0x00600041, 0x288077bd, 0x008d03c0, 0x008d0180 },
+   { 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22801c09, 0x20000000, 0x0a2a0001 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g5b
0,0 → 1,2
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g5b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 },
{ 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 },
{ 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 },

/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g7b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0001 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0003 },
{ 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0005 },

/drivers/video/Intel-2D/render/exa_wm_write.g5b
0,0 → 1,6
{ 0x00802001, 0x304003be, 0x008d01c0, 0x00000000 },
{ 0x00802001, 0x306003be, 0x008d0200, 0x00000000 },
{ 0x00802001, 0x308003be, 0x008d0240, 0x00000000 },
{ 0x00802001, 0x30a003be, 0x008d0280, 0x00000000 },
{ 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x548d0000, 0x94084800 },

 /drivers/video/Intel-2D/render/exa_wm_write.g7b
 ,0 → 1,17
+   { 0x00600001, 0x284003bd, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x286003bd, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x288003bd, 0x008d0200, 0x00000000 },
+   { 0x00600001, 0x28a003bd, 0x008d0220, 0x00000000 },
+   { 0x00600001, 0x28c003bd, 0x008d0240, 0x00000000 },
+   { 0x00600001, 0x28e003bd, 0x008d0260, 0x00000000 },
+   { 0x00600001, 0x290003bd, 0x008d0280, 0x00000000 },
+   { 0x00600001, 0x292003bd, 0x008d02a0, 0x00000000 },
+   { 0x05800031, 0x24001ca8, 0x00000840, 0x90031000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },

/drivers/video/Intel-2D/render/exa_wm_xy.g5b
0,0 → 1,4
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },

 /drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g5b
 ,0 → 1,12
+   { 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
+   { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
+   { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
+   { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+   { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+   { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
+   { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },

 /drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g7b
 ,0 → 1,12
+   { 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
+   { 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
+   { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
+   { 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
+   { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
+   { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
+   { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+   { 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
+   { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
+   { 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },

 /drivers/video/Intel-2D/sna.c
 ,14 → 12,6
 static int mask_width, mask_height;
-static inline void delay(uint32_t time)
-{
-    __asm__ __volatile__(
-    "int $0x40"
-    ::"a"(5), "b"(time)
-    :"memory");
-};
 typedef struct __attribute__((packed))
 {
   unsigned      handle;
 ,9 → 127,9
         } else if (sna->info->gen >= 040) {
                 if (gen4_render_init(sna))
                         backend = "Broadwater/Crestline";
-/*      } else if (sna->info->gen >= 030) {
+        } else if (sna->info->gen >= 030) {
                 if (gen3_render_init(sna))
-                        backend = "gen3"; */
+                        backend = "gen3";
         }
         DBG(("%s(backend=%s, prefer_gpu=%x)\n",
 ,7 → 180,6
     kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
-    delay(10);
 /*
     if (!xf86ReturnOptValBool(sna->Options,
                   OPTION_RELAXED_FENCING,
 ,7 → 457,7
     int width, height;
     int i;
-    printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height);
+//    printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height);
     bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height,
 ,I915_TILING_NONE, CREATE_CPU_MAP);

/drivers/video/Intel-2D/sna.h
135,6 → 135,7
} PictFormatShort;

#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff)

#define PICT_FORMAT_A(f) PIXMAN_FORMAT_A(f)

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3298 → Rev 3299