WebSVN – Kolibri OS – Path Comparison – / – /drivers/video/Intel-2D/brw/ Rev 3253 and /drivers/video/Intel-2D/brw/ Rev 3254

Regard whitespace Rev 3253 → Rev 3254

 /drivers/video/Intel-2D/brw/brw.h
 ,0 → 1,17
+#include "brw_eu.h"
+bool brw_sf_kernel__nomask(struct brw_compile *p);
+bool brw_sf_kernel__mask(struct brw_compile *p);
+bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width);
+bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width);

 /drivers/video/Intel-2D/brw/brw_eu.c
 ,0 → 1,150
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+#include "brw_eu.h"
+#include <string.h>
+#include <stdlib.h>
+/* Returns the corresponding conditional mod for swapping src0 and
+ * src1 in e.g. CMP.
+ */
+uint32_t
+brw_swap_cmod(uint32_t cmod)
+{
+        switch (cmod) {
+        case BRW_CONDITIONAL_Z:
+        case BRW_CONDITIONAL_NZ:
+                return cmod;
+        case BRW_CONDITIONAL_G:
+                return BRW_CONDITIONAL_LE;
+        case BRW_CONDITIONAL_GE:
+                return BRW_CONDITIONAL_L;
+        case BRW_CONDITIONAL_L:
+                return BRW_CONDITIONAL_GE;
+        case BRW_CONDITIONAL_LE:
+                return BRW_CONDITIONAL_G;
+        default:
+                return ~0;
+        }
+}
+/* How does predicate control work when execution_size != 8?  Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
+{
+        p->current->header.predicate_control = BRW_PREDICATE_NONE;
+        if (value != 0xff) {
+                if (value != p->flag_value) {
+                        brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+                        p->flag_value = value;
+                }
+                p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+        }
+}
+void brw_set_compression_control(struct brw_compile *p,
+                                 enum brw_compression compression_control)
+{
+        p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
+        if (p->gen >= 060) {
+                /* Since we don't use the 32-wide support in gen6, we translate
+                 * the pre-gen6 compression control here.
+                 */
+                switch (compression_control) {
+                case BRW_COMPRESSION_NONE:
+                        /* This is the "use the first set of bits of dmask/vmask/arf
+                         * according to execsize" option.
+                         */
+                        p->current->header.compression_control = GEN6_COMPRESSION_1Q;
+                        break;
+                case BRW_COMPRESSION_2NDHALF:
+                        /* For 8-wide, this is "use the second set of 8 bits." */
+                        p->current->header.compression_control = GEN6_COMPRESSION_2Q;
+                        break;
+                case BRW_COMPRESSION_COMPRESSED:
+                        /* For 16-wide instruction compression, use the first set of 16 bits
+                         * since we don't do 32-wide dispatch.
+                         */
+                        p->current->header.compression_control = GEN6_COMPRESSION_1H;
+                        break;
+                default:
+                        assert(!"not reached");
+                        p->current->header.compression_control = GEN6_COMPRESSION_1H;
+                        break;
+                }
+        } else {
+                p->current->header.compression_control = compression_control;
+        }
+}
+void brw_push_insn_state( struct brw_compile *p )
+{
+        assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+        memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+        p->compressed_stack[p->current - p->stack] = p->compressed;
+        p->current++;
+}
+void brw_pop_insn_state( struct brw_compile *p )
+{
+        assert(p->current != p->stack);
+        p->current--;
+        p->compressed = p->compressed_stack[p->current - p->stack];
+}
+void brw_compile_init(struct brw_compile *p, int gen, void *store)
+{
+        assert(gen);
+        p->gen = gen;
+        p->store = store;
+        p->nr_insn = 0;
+        p->current = p->stack;
+        p->compressed = false;
+        memset(p->current, 0, sizeof(p->current[0]));
+        /* Some defaults?
+        */
+        brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+        brw_set_saturate(p, 0);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_set_predicate_control_flag_value(p, 0xff);
+        p->if_stack_depth = 0;
+        p->if_stack_array_size = 0;
+        p->if_stack = NULL;
+}

 /drivers/video/Intel-2D/brw/brw_eu.h
 ,0 → 1,2266
+/*
+   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+   develop this 3D driver.
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+   The above copyright notice and this permission notice (including the
+   next paragraph) shall be included in all copies or substantial
+   portions of the Software.
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#ifndef BRW_EU_H
+#define BRW_EU_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+#define WRITEMASK_X 0x1
+#define WRITEMASK_Y 0x2
+#define WRITEMASK_Z 0x4
+#define WRITEMASK_W 0x8
+#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
+#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
+#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+enum brw_compression {
+        BRW_COMPRESSION_NONE,
+        BRW_COMPRESSION_2NDHALF,
+        BRW_COMPRESSION_COMPRESSED,
+};
+#define GEN6_COMPRESSION_1Q             0
+#define GEN6_COMPRESSION_2Q             1
+#define GEN6_COMPRESSION_3Q             2
+#define GEN6_COMPRESSION_4Q             3
+#define GEN6_COMPRESSION_1H             0
+#define GEN6_COMPRESSION_2H             2
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1 /* Z */
+#define BRW_CONDITIONAL_NEQ   2 /* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_R     7
+#define BRW_CONDITIONAL_O     8
+#define BRW_CONDITIONAL_U     9
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+/** @{
+ *
+ * Gen6 has replaced "mask enable/disable" with WECtrl, which is
+ * effectively the same but much simpler to think about.  Now, there
+ * are two contributors ANDed together to whether channels are
+ * executed: The predication on the instruction, and the channel write
+ * enable.
+ */
+/**
+ * This is the default value.  It means that a channel's write enable is set
+ * if the per-channel IP is pointing at this instruction.
+ */
+#define BRW_WE_NORMAL           0
+/**
+ * This is used like BRW_MASK_DISABLE, and causes all channels to have
+ * their write enable set.  Note that predication still contributes to
+ * whether the channel actually gets written.
+ */
+#define BRW_WE_ALL              1
+/** @} */
+enum opcode {
+        /* These are the actual hardware opcodes. */
+        BRW_OPCODE_MOV =        1,
+        BRW_OPCODE_SEL =        2,
+        BRW_OPCODE_NOT =        4,
+        BRW_OPCODE_AND =        5,
+        BRW_OPCODE_OR = 6,
+        BRW_OPCODE_XOR =        7,
+        BRW_OPCODE_SHR =        8,
+        BRW_OPCODE_SHL =        9,
+        BRW_OPCODE_RSR =        10,
+        BRW_OPCODE_RSL =        11,
+        BRW_OPCODE_ASR =        12,
+        BRW_OPCODE_CMP =        16,
+        BRW_OPCODE_CMPN =       17,
+        BRW_OPCODE_JMPI =       32,
+        BRW_OPCODE_IF = 34,
+        BRW_OPCODE_IFF =        35,
+        BRW_OPCODE_ELSE =       36,
+        BRW_OPCODE_ENDIF =      37,
+        BRW_OPCODE_DO = 38,
+        BRW_OPCODE_WHILE =      39,
+        BRW_OPCODE_BREAK =      40,
+        BRW_OPCODE_CONTINUE = 41,
+        BRW_OPCODE_HALT =       42,
+        BRW_OPCODE_MSAVE =      44,
+        BRW_OPCODE_MRESTORE = 45,
+        BRW_OPCODE_PUSH =       46,
+        BRW_OPCODE_POP =        47,
+        BRW_OPCODE_WAIT =       48,
+        BRW_OPCODE_SEND =       49,
+        BRW_OPCODE_SENDC =      50,
+        BRW_OPCODE_MATH =       56,
+        BRW_OPCODE_ADD =        64,
+        BRW_OPCODE_MUL =        65,
+        BRW_OPCODE_AVG =        66,
+        BRW_OPCODE_FRC =        67,
+        BRW_OPCODE_RNDU =       68,
+        BRW_OPCODE_RNDD =       69,
+        BRW_OPCODE_RNDE =       70,
+        BRW_OPCODE_RNDZ =       71,
+        BRW_OPCODE_MAC =        72,
+        BRW_OPCODE_MACH =       73,
+        BRW_OPCODE_LZD =        74,
+        BRW_OPCODE_SAD2 =       80,
+        BRW_OPCODE_SADA2 =      81,
+        BRW_OPCODE_DP4 =        84,
+        BRW_OPCODE_DPH =        85,
+        BRW_OPCODE_DP3 =        86,
+        BRW_OPCODE_DP2 =        87,
+        BRW_OPCODE_DPA2 =       88,
+        BRW_OPCODE_LINE =       89,
+        BRW_OPCODE_PLN =        90,
+        BRW_OPCODE_NOP =        126,
+        /* These are compiler backend opcodes that get translated into other
+         * instructions.
+         */
+        FS_OPCODE_FB_WRITE = 128,
+        SHADER_OPCODE_RCP,
+        SHADER_OPCODE_RSQ,
+        SHADER_OPCODE_SQRT,
+        SHADER_OPCODE_EXP2,
+        SHADER_OPCODE_LOG2,
+        SHADER_OPCODE_POW,
+        SHADER_OPCODE_SIN,
+        SHADER_OPCODE_COS,
+        FS_OPCODE_DDX,
+        FS_OPCODE_DDY,
+        FS_OPCODE_PIXEL_X,
+        FS_OPCODE_PIXEL_Y,
+        FS_OPCODE_CINTERP,
+        FS_OPCODE_LINTERP,
+        FS_OPCODE_TEX,
+        FS_OPCODE_TXB,
+        FS_OPCODE_TXD,
+        FS_OPCODE_TXF,
+        FS_OPCODE_TXL,
+        FS_OPCODE_TXS,
+        FS_OPCODE_DISCARD,
+        FS_OPCODE_SPILL,
+        FS_OPCODE_UNSPILL,
+        FS_OPCODE_PULL_CONSTANT_LOAD,
+        VS_OPCODE_URB_WRITE,
+        VS_OPCODE_SCRATCH_READ,
+        VS_OPCODE_SCRATCH_WRITE,
+        VS_OPCODE_PULL_CONSTANT_LOAD,
+};
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5 /* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6 /* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+#define BRW_MRF_COMPR4                  (1 << 7)
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+#define BRW_MESSAGE_TARGET_NULL               0
+#define BRW_MESSAGE_TARGET_MATH               1 /* reserved on GEN6 */
+#define BRW_MESSAGE_TARGET_SAMPLER            2
+#define BRW_MESSAGE_TARGET_GATEWAY            3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
+#define BRW_MESSAGE_TARGET_URB                6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
+#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE  4
+#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE   5
+#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE    9
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+#define BRW_SAMPLER_MESSAGE_SAMPLE                    0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE              0
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
+#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
+/* for GEN5 only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+/* This one stays the same across generations. */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+/* GEN4 */
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+/* G45, GEN5 */
+#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ         1
+#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ      3
+#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+/* GEN6 */
+#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ        1
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
+#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
+#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
+#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+/**
+ * Message target: Shared Function ID for where to SEND a message.
+ *
+ * These are enumerated in the ISA reference under "send - Send Message".
+ * In particular, see the following tables:
+ * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
+ * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
+ * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
+ *   Overview / GPE Function IDs
+ */
+enum brw_message_target {
+   BRW_SFID_NULL                     = 0,
+   BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
+   BRW_SFID_SAMPLER                  = 2,
+   BRW_SFID_MESSAGE_GATEWAY          = 3,
+   BRW_SFID_DATAPORT_READ            = 4,
+   BRW_SFID_DATAPORT_WRITE           = 5,
+   BRW_SFID_URB                      = 6,
+   BRW_SFID_THREAD_SPAWNER           = 7,
+   GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
+   GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
+   GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
+   GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
+};
+#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+/* GEN6 */
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
+#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
+#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
+#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
+#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
+#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
+#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+#define BRW_URB_OPCODE_WRITE  0
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+#define REG_SIZE (8*4)
+struct brw_instruction {
+        struct {
+                unsigned opcode:7;
+                unsigned pad:1;
+                unsigned access_mode:1;
+                unsigned mask_control:1;
+                unsigned dependency_control:2;
+                unsigned compression_control:2; /* gen6: quater control */
+                unsigned thread_control:2;
+                unsigned predicate_control:4;
+                unsigned predicate_inverse:1;
+                unsigned execution_size:3;
+                /**
+                 * Conditional Modifier for most instructions.  On Gen6+, this is also
+                 * used for the SEND instruction's Message Target/SFID.
+                 */
+                unsigned destreg__conditionalmod:4;
+                unsigned acc_wr_control:1;
+                unsigned cmpt_control:1;
+                unsigned debug_control:1;
+                unsigned saturate:1;
+        } header;
+        union {
+                struct {
+                        unsigned dest_reg_file:2;
+                        unsigned dest_reg_type:3;
+                        unsigned src0_reg_file:2;
+                        unsigned src0_reg_type:3;
+                        unsigned src1_reg_file:2;
+                        unsigned src1_reg_type:3;
+                        unsigned pad:1;
+                        unsigned dest_subreg_nr:5;
+                        unsigned dest_reg_nr:8;
+                        unsigned dest_horiz_stride:2;
+                        unsigned dest_address_mode:1;
+                } da1;
+                struct {
+                        unsigned dest_reg_file:2;
+                        unsigned dest_reg_type:3;
+                        unsigned src0_reg_file:2;
+                        unsigned src0_reg_type:3;
+                        unsigned src1_reg_file:2;        /* 0x00000c00 */
+                        unsigned src1_reg_type:3;        /* 0x00007000 */
+                        unsigned pad:1;
+                        int dest_indirect_offset:10;    /* offset against the deref'd address reg */
+                        unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
+                        unsigned dest_horiz_stride:2;
+                        unsigned dest_address_mode:1;
+                } ia1;
+                struct {
+                        unsigned dest_reg_file:2;
+                        unsigned dest_reg_type:3;
+                        unsigned src0_reg_file:2;
+                        unsigned src0_reg_type:3;
+                        unsigned src1_reg_file:2;
+                        unsigned src1_reg_type:3;
+                        unsigned pad:1;
+                        unsigned dest_writemask:4;
+                        unsigned dest_subreg_nr:1;
+                        unsigned dest_reg_nr:8;
+                        unsigned dest_horiz_stride:2;
+                        unsigned dest_address_mode:1;
+                } da16;
+                struct {
+                        unsigned dest_reg_file:2;
+                        unsigned dest_reg_type:3;
+                        unsigned src0_reg_file:2;
+                        unsigned src0_reg_type:3;
+                        unsigned pad0:6;
+                        unsigned dest_writemask:4;
+                        int dest_indirect_offset:6;
+                        unsigned dest_subreg_nr:3;
+                        unsigned dest_horiz_stride:2;
+                        unsigned dest_address_mode:1;
+                } ia16;
+                struct {
+                        unsigned dest_reg_file:2;
+                        unsigned dest_reg_type:3;
+                        unsigned src0_reg_file:2;
+                        unsigned src0_reg_type:3;
+                        unsigned src1_reg_file:2;
+                        unsigned src1_reg_type:3;
+                        unsigned pad:1;
+                        int jump_count:16;
+                } branch_gen6;
+                struct {
+                        unsigned dest_reg_file:1;
+                        unsigned flag_subreg_num:1;
+                        unsigned pad0:2;
+                        unsigned src0_abs:1;
+                        unsigned src0_negate:1;
+                        unsigned src1_abs:1;
+                        unsigned src1_negate:1;
+                        unsigned src2_abs:1;
+                        unsigned src2_negate:1;
+                        unsigned pad1:7;
+                        unsigned dest_writemask:4;
+                        unsigned dest_subreg_nr:3;
+                        unsigned dest_reg_nr:8;
+                } da3src;
+        } bits1;
+        union {
+                struct {
+                        unsigned src0_subreg_nr:5;
+                        unsigned src0_reg_nr:8;
+                        unsigned src0_abs:1;
+                        unsigned src0_negate:1;
+                        unsigned src0_address_mode:1;
+                        unsigned src0_horiz_stride:2;
+                        unsigned src0_width:3;
+                        unsigned src0_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad:5;
+                } da1;
+                struct {
+                        int src0_indirect_offset:10;
+                        unsigned src0_subreg_nr:3;
+                        unsigned src0_abs:1;
+                        unsigned src0_negate:1;
+                        unsigned src0_address_mode:1;
+                        unsigned src0_horiz_stride:2;
+                        unsigned src0_width:3;
+                        unsigned src0_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad:5;
+                } ia1;
+                struct {
+                        unsigned src0_swz_x:2;
+                        unsigned src0_swz_y:2;
+                        unsigned src0_subreg_nr:1;
+                        unsigned src0_reg_nr:8;
+                        unsigned src0_abs:1;
+                        unsigned src0_negate:1;
+                        unsigned src0_address_mode:1;
+                        unsigned src0_swz_z:2;
+                        unsigned src0_swz_w:2;
+                        unsigned pad0:1;
+                        unsigned src0_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad1:5;
+                } da16;
+                struct {
+                        unsigned src0_swz_x:2;
+                        unsigned src0_swz_y:2;
+                        int src0_indirect_offset:6;
+                        unsigned src0_subreg_nr:3;
+                        unsigned src0_abs:1;
+                        unsigned src0_negate:1;
+                        unsigned src0_address_mode:1;
+                        unsigned src0_swz_z:2;
+                        unsigned src0_swz_w:2;
+                        unsigned pad0:1;
+                        unsigned src0_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad1:5;
+                } ia16;
+                /* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
+                 *
+                 * Does not apply to Gen6+.  The SFID/message target moved to bits
+                 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
+                 */
+                struct {
+                        unsigned pad:26;
+                        unsigned end_of_thread:1;
+                        unsigned pad1:1;
+                        unsigned sfid:4;
+                } send_gen5;  /* for Ironlake only */
+                struct {
+                        unsigned src0_rep_ctrl:1;
+                        unsigned src0_swizzle:8;
+                        unsigned src0_subreg_nr:3;
+                        unsigned src0_reg_nr:8;
+                        unsigned pad0:1;
+                        unsigned src1_rep_ctrl:1;
+                        unsigned src1_swizzle:8;
+                        unsigned src1_subreg_nr_low:2;
+                } da3src;
+        } bits2;
+        union {
+                struct {
+                        unsigned src1_subreg_nr:5;
+                        unsigned src1_reg_nr:8;
+                        unsigned src1_abs:1;
+                        unsigned src1_negate:1;
+                        unsigned src1_address_mode:1;
+                        unsigned src1_horiz_stride:2;
+                        unsigned src1_width:3;
+                        unsigned src1_vert_stride:4;
+                        unsigned pad0:7;
+                } da1;
+                struct {
+                        unsigned src1_swz_x:2;
+                        unsigned src1_swz_y:2;
+                        unsigned src1_subreg_nr:1;
+                        unsigned src1_reg_nr:8;
+                        unsigned src1_abs:1;
+                        unsigned src1_negate:1;
+                        unsigned src1_address_mode:1;
+                        unsigned src1_swz_z:2;
+                        unsigned src1_swz_w:2;
+                        unsigned pad1:1;
+                        unsigned src1_vert_stride:4;
+                        unsigned pad2:7;
+                } da16;
+                struct {
+                        int src1_indirect_offset:10;
+                        unsigned src1_subreg_nr:3;
+                        unsigned src1_abs:1;
+                        unsigned src1_negate:1;
+                        unsigned src1_address_mode:1;
+                        unsigned src1_horiz_stride:2;
+                        unsigned src1_width:3;
+                        unsigned src1_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad1:5;
+                } ia1;
+                struct {
+                        unsigned src1_swz_x:2;
+                        unsigned src1_swz_y:2;
+                        int  src1_indirect_offset:6;
+                        unsigned src1_subreg_nr:3;
+                        unsigned src1_abs:1;
+                        unsigned src1_negate:1;
+                        unsigned pad0:1;
+                        unsigned src1_swz_z:2;
+                        unsigned src1_swz_w:2;
+                        unsigned pad1:1;
+                        unsigned src1_vert_stride:4;
+                        unsigned flag_subreg_nr:1;
+                        unsigned flag_reg_nr:1;
+                        unsigned pad2:5;
+                } ia16;
+                struct {
+                        int jump_count:16;      /* note: signed */
+                        unsigned pop_count:4;
+                        unsigned pad0:12;
+                } if_else;
+                /* This is also used for gen7 IF/ELSE instructions */
+                struct {
+                        /* Signed jump distance to the ip to jump to if all channels
+                         * are disabled after the break or continue.  It should point
+                         * to the end of the innermost control flow block, as that's
+                         * where some channel could get re-enabled.
+                         */
+                        int jip:16;
+                        /* Signed jump distance to the location to resume execution
+                         * of this channel if it's enabled for the break or continue.
+                         */
+                        int uip:16;
+                } break_cont;
+                /**
+                 * \defgroup SEND instructions / Message Descriptors
+                 *
+                 * @{
+                 */
+                /**
+                 * Generic Message Descriptor for Gen4 SEND instructions.  The structs
+                 * below expand function_control to something specific for their
+                 * message.  Due to struct packing issues, they duplicate these bits.
+                 *
+                 * See the G45 PRM, Volume 4, Table 14-15.
+                 */
+                struct {
+                        unsigned function_control:16;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } generic;
+                /**
+                 * Generic Message Descriptor for Gen5-7 SEND instructions.
+                 *
+                 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
+                 * of the information on the SEND instruction is missing from the public
+                 * Ironlake PRM.)
+                 *
+                 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
+                 * According to the SEND instruction description:
+                 * "The MSb of the message description, the EOT field, always comes from
+                 *  bit 127 of the instruction word"...which is bit 31 of this field.
+                 */
+                struct {
+                        unsigned function_control:19;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } generic_gen5;
+                /** G45 PRM, Volume 4, Section 6.1.1.1 */
+                struct {
+                        unsigned function:4;
+                        unsigned int_type:1;
+                        unsigned precision:1;
+                        unsigned saturate:1;
+                        unsigned data_type:1;
+                        unsigned pad0:8;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } math;
+                /** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
+                struct {
+                        unsigned function:4;
+                        unsigned int_type:1;
+                        unsigned precision:1;
+                        unsigned saturate:1;
+                        unsigned data_type:1;
+                        unsigned snapshot:1;
+                        unsigned pad0:10;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } math_gen5;
+                /** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned sampler:4;
+                        unsigned return_format:2;
+                        unsigned msg_type:2;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } sampler;
+                /** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned sampler:4;
+                        unsigned msg_type:4;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } sampler_g4x;
+                /** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned sampler:4;
+                        unsigned msg_type:4;
+                        unsigned simd_mode:2;
+                        unsigned pad0:1;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } sampler_gen5;
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned sampler:4;
+                        unsigned msg_type:5;
+                        unsigned simd_mode:2;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } sampler_gen7;
+                struct brw_urb_immediate {
+                        unsigned opcode:4;
+                        unsigned offset:6;
+                        unsigned swizzle_control:2;
+                        unsigned pad:1;
+                        unsigned allocate:1;
+                        unsigned used:1;
+                        unsigned complete:1;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } urb;
+                struct {
+                        unsigned opcode:4;
+                        unsigned offset:6;
+                        unsigned swizzle_control:2;
+                        unsigned pad:1;
+                        unsigned allocate:1;
+                        unsigned used:1;
+                        unsigned complete:1;
+                        unsigned pad0:3;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } urb_gen5;
+                struct {
+                        unsigned opcode:3;
+                        unsigned offset:11;
+                        unsigned swizzle_control:1;
+                        unsigned complete:1;
+                        unsigned per_slot_offset:1;
+                        unsigned pad0:2;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } urb_gen7;
+                /** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:4;
+                        unsigned msg_type:2;
+                        unsigned target_cache:2;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } dp_read;
+                /** G45 PRM, Volume 4, Section 5.10.1.1.2 */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned msg_type:3;
+                        unsigned target_cache:2;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } dp_read_g4x;
+                /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned msg_type:3;
+                        unsigned target_cache:2;
+                        unsigned pad0:3;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } dp_read_gen5;
+                /** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned last_render_target:1;
+                        unsigned msg_type:3;
+                        unsigned send_commit_msg:1;
+                        unsigned response_length:4;
+                        unsigned msg_length:4;
+                        unsigned msg_target:4;
+                        unsigned pad1:3;
+                        unsigned end_of_thread:1;
+                } dp_write;
+                /** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned last_render_target:1;
+                        unsigned msg_type:3;
+                        unsigned send_commit_msg:1;
+                        unsigned pad0:3;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } dp_write_gen5;
+                /**
+                 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
+                 *
+                 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
+                 **/
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:5;
+                        unsigned msg_type:3;
+                        unsigned pad0:3;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } gen6_dp_sampler_const_cache;
+                /**
+                 * Message for the Sandybridge Render Cache Data Port.
+                 *
+                 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
+                 * Section 3.9.2.1.1: Message Descriptor.
+                 *
+                 * "Slot Group Select" and "Last Render Target" are part of the
+                 * 5-bit message control for Render Target Write messages.  See
+                 * Section 3.9.9.2.1 of the same volume.
+                 */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned slot_group_select:1;
+                        unsigned last_render_target:1;
+                        unsigned msg_type:4;
+                        unsigned send_commit_msg:1;
+                        unsigned pad0:1;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad1:2;
+                        unsigned end_of_thread:1;
+                } gen6_dp;
+                /**
+                 * Message for any of the Gen7 Data Port caches.
+                 *
+                 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
+                 * Data Port Messages / Message Descriptor.  Once again, "Slot Group
+                 * Select" and "Last Render Target" are part of the 6-bit message
+                 * control for Render Target Writes.
+                 */
+                struct {
+                        unsigned binding_table_index:8;
+                        unsigned msg_control:3;
+                        unsigned slot_group_select:1;
+                        unsigned last_render_target:1;
+                        unsigned msg_control_pad:1;
+                        unsigned msg_type:4;
+                        unsigned pad1:1;
+                        unsigned header_present:1;
+                        unsigned response_length:5;
+                        unsigned msg_length:4;
+                        unsigned pad2:2;
+                        unsigned end_of_thread:1;
+                } gen7_dp;
+                /** @} */
+                struct {
+                        unsigned src1_subreg_nr_high:1;
+                        unsigned src1_reg_nr:8;
+                        unsigned pad0:1;
+                        unsigned src2_rep_ctrl:1;
+                        unsigned src2_swizzle:8;
+                        unsigned src2_subreg_nr:3;
+                        unsigned src2_reg_nr:8;
+                        unsigned pad1:2;
+                } da3src;
+                int d;
+                unsigned ud;
+                float f;
+        } bits3;
+};
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg {
+        unsigned type:4;
+        unsigned file:2;
+        unsigned nr:8;
+        unsigned subnr:5;               /* :1 in align16 */
+        unsigned negate:1;              /* source only */
+        unsigned abs:1;         /* source only */
+        unsigned vstride:4;             /* source only */
+        unsigned width:3;               /* src only, align1 only */
+        unsigned hstride:2;             /* align1 only */
+        unsigned address_mode:1;        /* relative addressing, hopefully! */
+        unsigned pad0:1;
+        union {
+                struct {
+                        unsigned swizzle:8;             /* src only, align16 only */
+                        unsigned writemask:4;           /* dest only, align16 only */
+                        int  indirect_offset:10;        /* relative addressing offset */
+                        unsigned pad1:10;               /* two dwords total */
+                } bits;
+                float f;
+                int   d;
+                unsigned ud;
+        } dw1;
+};
+struct brw_indirect {
+        unsigned addr_subnr:4;
+        int addr_offset:10;
+        unsigned pad:18;
+};
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+struct brw_compile {
+        struct brw_instruction *store;
+        unsigned nr_insn;
+        int gen;
+        /* Allow clients to push/pop instruction state:
+        */
+        struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+        bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+        struct brw_instruction *current;
+        unsigned flag_value;
+        bool single_program_flow;
+        bool compressed;
+        /* Control flow stacks:
+         * - if_stack contains IF and ELSE instructions which must be patched
+         *   (and popped) once the matching ENDIF instruction is encountered.
+         */
+        struct brw_instruction **if_stack;
+        int if_stack_depth;
+        int if_stack_array_size;
+};
+static inline int type_sz(unsigned type)
+{
+        switch (type) {
+        case BRW_REGISTER_TYPE_UD:
+        case BRW_REGISTER_TYPE_D:
+        case BRW_REGISTER_TYPE_F:
+                return 4;
+        case BRW_REGISTER_TYPE_HF:
+        case BRW_REGISTER_TYPE_UW:
+        case BRW_REGISTER_TYPE_W:
+                return 2;
+        case BRW_REGISTER_TYPE_UB:
+        case BRW_REGISTER_TYPE_B:
+                return 1;
+        default:
+                return 0;
+        }
+}
+/**
+ * Construct a brw_reg.
+ * \param file  one of the BRW_x_REGISTER_FILE values
+ * \param nr  register number/index
+ * \param subnr  register sub number
+ * \param type  one of BRW_REGISTER_TYPE_x
+ * \param vstride  one of BRW_VERTICAL_STRIDE_x
+ * \param width  one of BRW_WIDTH_x
+ * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle  one of BRW_SWIZZLE_x
+ * \param writemask  WRITEMASK_X/Y/Z/W bitfield
+ */
+static inline struct brw_reg brw_reg(unsigned file,
+                                     unsigned nr,
+                                     unsigned subnr,
+                                     unsigned type,
+                                     unsigned vstride,
+                                     unsigned width,
+                                     unsigned hstride,
+                                     unsigned swizzle,
+                                     unsigned writemask)
+{
+        struct brw_reg reg;
+        if (file == BRW_GENERAL_REGISTER_FILE)
+                assert(nr < BRW_MAX_GRF);
+        else if (file == BRW_MESSAGE_REGISTER_FILE)
+                assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+        else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+                assert(nr <= BRW_ARF_IP);
+        reg.type = type;
+        reg.file = file;
+        reg.nr = nr;
+        reg.subnr = subnr * type_sz(type);
+        reg.negate = 0;
+        reg.abs = 0;
+        reg.vstride = vstride;
+        reg.width = width;
+        reg.hstride = hstride;
+        reg.address_mode = BRW_ADDRESS_DIRECT;
+        reg.pad0 = 0;
+        /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+         * set swizzle and writemask to W, as the lower bits of subnr will
+         * be lost when converted to align16.  This is probably too much to
+         * keep track of as you'd want it adjusted by suboffset(), etc.
+         * Perhaps fix up when converting to align16?
+         */
+        reg.dw1.bits.swizzle = swizzle;
+        reg.dw1.bits.writemask = writemask;
+        reg.dw1.bits.indirect_offset = 0;
+        reg.dw1.bits.pad1 = 0;
+        return reg;
+}
+/** Construct float[16] register */
+static inline struct brw_reg brw_vec16_reg(unsigned file,
+                                           unsigned nr,
+                                           unsigned subnr)
+{
+        return brw_reg(file,
+                       nr,
+                       subnr,
+                       BRW_REGISTER_TYPE_F,
+                       BRW_VERTICAL_STRIDE_16,
+                       BRW_WIDTH_16,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYZW,
+                       WRITEMASK_XYZW);
+}
+/** Construct float[8] register */
+static inline struct brw_reg brw_vec8_reg(unsigned file,
+                                          unsigned nr,
+                                          unsigned subnr)
+{
+        return brw_reg(file,
+                       nr,
+                       subnr,
+                       BRW_REGISTER_TYPE_F,
+                       BRW_VERTICAL_STRIDE_8,
+                       BRW_WIDTH_8,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYZW,
+                       WRITEMASK_XYZW);
+}
+/** Construct float[4] register */
+static inline struct brw_reg brw_vec4_reg(unsigned file,
+                                          unsigned nr,
+                                          unsigned subnr)
+{
+        return brw_reg(file,
+                       nr,
+                       subnr,
+                       BRW_REGISTER_TYPE_F,
+                       BRW_VERTICAL_STRIDE_4,
+                       BRW_WIDTH_4,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYZW,
+                       WRITEMASK_XYZW);
+}
+/** Construct float[2] register */
+static inline struct brw_reg brw_vec2_reg(unsigned file,
+                                          unsigned nr,
+                                          unsigned subnr)
+{
+        return brw_reg(file,
+                       nr,
+                       subnr,
+                       BRW_REGISTER_TYPE_F,
+                       BRW_VERTICAL_STRIDE_2,
+                       BRW_WIDTH_2,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYXY,
+                       WRITEMASK_XY);
+}
+/** Construct float[1] register */
+static inline struct brw_reg brw_vec1_reg(unsigned file,
+                                          unsigned nr,
+                                          unsigned subnr)
+{
+        return brw_reg(file,
+                       nr,
+                       subnr,
+                       BRW_REGISTER_TYPE_F,
+                       BRW_VERTICAL_STRIDE_0,
+                       BRW_WIDTH_1,
+                       BRW_HORIZONTAL_STRIDE_0,
+                       BRW_SWIZZLE_XXXX,
+                       WRITEMASK_X);
+}
+static inline struct brw_reg __retype(struct brw_reg reg,
+                                      unsigned type)
+{
+        reg.type = type;
+        return reg;
+}
+static inline struct brw_reg __retype_d(struct brw_reg reg)
+{
+        return __retype(reg, BRW_REGISTER_TYPE_D);
+}
+static inline struct brw_reg __retype_ud(struct brw_reg reg)
+{
+        return __retype(reg, BRW_REGISTER_TYPE_UD);
+}
+static inline struct brw_reg __retype_uw(struct brw_reg reg)
+{
+        return __retype(reg, BRW_REGISTER_TYPE_UW);
+}
+static inline struct brw_reg __sechalf(struct brw_reg reg)
+{
+        if (reg.vstride)
+                reg.nr++;
+        return reg;
+}
+static inline struct brw_reg __suboffset(struct brw_reg reg,
+                                         unsigned delta)
+{
+        reg.subnr += delta * type_sz(reg.type);
+        return reg;
+}
+static inline struct brw_reg __offset(struct brw_reg reg,
+                                      unsigned delta)
+{
+        reg.nr += delta;
+        return reg;
+}
+static inline struct brw_reg byte_offset(struct brw_reg reg,
+                                         unsigned bytes)
+{
+        unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+        reg.nr = newoffset / REG_SIZE;
+        reg.subnr = newoffset % REG_SIZE;
+        return reg;
+}
+/** Construct unsigned word[16] register */
+static inline struct brw_reg brw_uw16_reg(unsigned file,
+                                          unsigned nr,
+                                          unsigned subnr)
+{
+        return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+/** Construct unsigned word[8] register */
+static inline struct brw_reg brw_uw8_reg(unsigned file,
+                                         unsigned nr,
+                                         unsigned subnr)
+{
+        return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+/** Construct unsigned word[1] register */
+static inline struct brw_reg brw_uw1_reg(unsigned file,
+                                         unsigned nr,
+                                         unsigned subnr)
+{
+        return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+static inline struct brw_reg brw_imm_reg(unsigned type)
+{
+        return brw_reg( BRW_IMMEDIATE_VALUE,
+,
+,
+                        type,
+                        BRW_VERTICAL_STRIDE_0,
+                        BRW_WIDTH_1,
+                        BRW_HORIZONTAL_STRIDE_0,
+,
+);
+}
+/** Construct float immediate register */
+static inline struct brw_reg brw_imm_f(float f)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+        imm.dw1.f = f;
+        return imm;
+}
+/** Construct integer immediate register */
+static inline struct brw_reg brw_imm_d(int d)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+        imm.dw1.d = d;
+        return imm;
+}
+/** Construct uint immediate register */
+static inline struct brw_reg brw_imm_ud(unsigned ud)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+        imm.dw1.ud = ud;
+        return imm;
+}
+/** Construct ushort immediate register */
+static inline struct brw_reg brw_imm_uw(uint16_t uw)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+        imm.dw1.ud = uw | (uw << 16);
+        return imm;
+}
+/** Construct short immediate register */
+static inline struct brw_reg brw_imm_w(int16_t w)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+        imm.dw1.d = w | (w << 16);
+        return imm;
+}
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+/** Construct vector of eight signed half-byte values */
+static inline struct brw_reg brw_imm_v(unsigned v)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+        imm.vstride = BRW_VERTICAL_STRIDE_0;
+        imm.width = BRW_WIDTH_8;
+        imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+        imm.dw1.ud = v;
+        return imm;
+}
+/** Construct vector of four 8-bit float values */
+static inline struct brw_reg brw_imm_vf(unsigned v)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+        imm.vstride = BRW_VERTICAL_STRIDE_0;
+        imm.width = BRW_WIDTH_4;
+        imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+        imm.dw1.ud = v;
+        return imm;
+}
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+static inline struct brw_reg brw_imm_vf4(unsigned v0,
+                                         unsigned v1,
+                                         unsigned v2,
+                                         unsigned v3)
+{
+        struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+        imm.vstride = BRW_VERTICAL_STRIDE_0;
+        imm.width = BRW_WIDTH_4;
+        imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+        imm.dw1.ud = ((v0 << 0) |
+                      (v1 << 8) |
+                      (v2 << 16) |
+                      (v3 << 24));
+        return imm;
+}
+static inline struct brw_reg brw_address(struct brw_reg reg)
+{
+        return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+/** Construct float[1] general-purpose register */
+static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
+{
+        return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[2] general-purpose register */
+static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
+{
+        return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[4] general-purpose register */
+static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
+{
+        return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct float[8] general-purpose register */
+static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
+{
+        return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
+{
+        return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
+{
+        return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+/** Construct null register (usually used for setting condition codes) */
+static inline struct brw_reg brw_null_reg(void)
+{
+        return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                            BRW_ARF_NULL,
+);
+}
+static inline struct brw_reg brw_address_reg(unsigned subnr)
+{
+        return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                           BRW_ARF_ADDRESS,
+                           subnr);
+}
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static inline struct brw_reg brw_ip_reg(void)
+{
+        return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_IP,
+,
+                       BRW_REGISTER_TYPE_UD,
+                       BRW_VERTICAL_STRIDE_4, /* ? */
+                       BRW_WIDTH_1,
+                       BRW_HORIZONTAL_STRIDE_0,
+                       BRW_SWIZZLE_XYZW, /* NOTE! */
+                       WRITEMASK_XYZW); /* NOTE! */
+}
+static inline struct brw_reg brw_acc_reg(void)
+{
+        return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                            BRW_ARF_ACCUMULATOR,
+);
+}
+static inline struct brw_reg brw_notification_1_reg(void)
+{
+        return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                       BRW_ARF_NOTIFICATION_COUNT,
+,
+                       BRW_REGISTER_TYPE_UD,
+                       BRW_VERTICAL_STRIDE_0,
+                       BRW_WIDTH_1,
+                       BRW_HORIZONTAL_STRIDE_0,
+                       BRW_SWIZZLE_XXXX,
+                       WRITEMASK_X);
+}
+static inline struct brw_reg brw_flag_reg(void)
+{
+        return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                           BRW_ARF_FLAG,
+);
+}
+static inline struct brw_reg brw_mask_reg(unsigned subnr)
+{
+        return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+                           BRW_ARF_MASK,
+                           subnr);
+}
+static inline struct brw_reg brw_message_reg(unsigned nr)
+{
+        assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+        return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
+}
+static inline struct brw_reg brw_message4_reg(unsigned nr,
+                                              int subnr)
+{
+        assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+        return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
+}
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static inline unsigned cvt(unsigned val)
+{
+        switch (val) {
+        case 0: return 0;
+        case 1: return 1;
+        case 2: return 2;
+        case 4: return 3;
+        case 8: return 4;
+        case 16: return 5;
+        case 32: return 6;
+        }
+        return 0;
+}
+static inline struct brw_reg __stride(struct brw_reg reg,
+                                    unsigned vstride,
+                                    unsigned width,
+                                    unsigned hstride)
+{
+        reg.vstride = cvt(vstride);
+        reg.width = cvt(width) - 1;
+        reg.hstride = cvt(hstride);
+        return reg;
+}
+static inline struct brw_reg vec16(struct brw_reg reg)
+{
+        return __stride(reg, 16,16,1);
+}
+static inline struct brw_reg vec8(struct brw_reg reg)
+{
+        return __stride(reg, 8,8,1);
+}
+static inline struct brw_reg vec4(struct brw_reg reg)
+{
+        return __stride(reg, 4,4,1);
+}
+static inline struct brw_reg vec2(struct brw_reg reg)
+{
+        return __stride(reg, 2,2,1);
+}
+static inline struct brw_reg vec1(struct brw_reg reg)
+{
+        return __stride(reg, 0,1,0);
+}
+static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
+{
+        return vec1(__suboffset(reg, elt));
+}
+static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
+{
+        return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+static inline struct brw_reg brw_swizzle(struct brw_reg reg,
+                                         unsigned x,
+                                         unsigned y,
+                                         unsigned z,
+                                         unsigned w)
+{
+        assert(reg.file != BRW_IMMEDIATE_VALUE);
+        reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+                                            BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+                                            BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+                                            BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+        return reg;
+}
+static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
+                                          unsigned x)
+{
+        return brw_swizzle(reg, x, x, x, x);
+}
+static inline struct brw_reg brw_writemask(struct brw_reg reg,
+                                           unsigned mask)
+{
+        assert(reg.file != BRW_IMMEDIATE_VALUE);
+        reg.dw1.bits.writemask &= mask;
+        return reg;
+}
+static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
+                                               unsigned mask)
+{
+        assert(reg.file != BRW_IMMEDIATE_VALUE);
+        reg.dw1.bits.writemask = mask;
+        return reg;
+}
+static inline struct brw_reg brw_negate(struct brw_reg reg)
+{
+        reg.negate ^= 1;
+        return reg;
+}
+static inline struct brw_reg brw_abs(struct brw_reg reg)
+{
+        reg.abs = 1;
+        return reg;
+}
+/***********************************************************************
+*/
+static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
+                                               int offset)
+{
+        struct brw_reg reg =  brw_vec4_grf(0, 0);
+        reg.subnr = subnr;
+        reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+        reg.dw1.bits.indirect_offset = offset;
+        return reg;
+}
+static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
+                                               int offset)
+{
+        struct brw_reg reg =  brw_vec1_grf(0, 0);
+        reg.subnr = subnr;
+        reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+        reg.dw1.bits.indirect_offset = offset;
+        return reg;
+}
+static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
+{
+        return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
+{
+        return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
+{
+        return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
+{
+        return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
+{
+        return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
+{
+        return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+        return brw_address_reg(ptr.addr_subnr);
+}
+static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
+{
+        ptr.addr_offset += offset;
+        return ptr;
+}
+static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
+{
+        struct brw_indirect ptr;
+        ptr.addr_subnr = addr_subnr;
+        ptr.addr_offset = offset;
+        ptr.pad = 0;
+        return ptr;
+}
+/** Do two brw_regs refer to the same register? */
+static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+        return r1.file == r2.file && r1.nr == r2.nr;
+}
+static inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+        return &p->store[p->nr_insn];
+}
+static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
+{
+        p->current->header.predicate_control = pc;
+}
+static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
+{
+        p->current->header.predicate_inverse = predicate_inverse;
+}
+static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
+{
+        p->current->header.destreg__conditionalmod = conditional;
+}
+static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
+{
+        p->current->header.access_mode = access_mode;
+}
+static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
+{
+        p->current->header.mask_control = value;
+}
+static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
+{
+        p->current->header.saturate = value;
+}
+static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
+{
+        if (p->gen >= 060)
+                p->current->header.acc_wr_control = value;
+}
+void brw_pop_insn_state(struct brw_compile *p);
+void brw_push_insn_state(struct brw_compile *p);
+void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
+void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
+void brw_compile_init(struct brw_compile *p, int gen, void *store);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+                  struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+                  struct brw_reg reg);
+void brw_set_src1(struct brw_compile *p,
+                  struct brw_instruction *insn,
+                  struct brw_reg reg);
+void gen6_resolve_implied_move(struct brw_compile *p,
+                               struct brw_reg *src,
+                               unsigned msg_reg_nr);
+static inline struct brw_instruction *
+brw_next_insn(struct brw_compile *p, unsigned opcode)
+{
+        struct brw_instruction *insn;
+        assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+        insn = &p->store[p->nr_insn++];
+        *insn = *p->current;
+        if (p->current->header.destreg__conditionalmod) {
+                p->current->header.destreg__conditionalmod = 0;
+                p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+        }
+        insn->header.opcode = opcode;
+        return insn;
+}
+/* Helpers for regular instructions: */
+#define ALU1(OP)                                                        \
+static inline struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+                                               struct brw_reg dest,     \
+                                               struct brw_reg src0)     \
+{                                                                       \
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);                     \
+}
+#define ALU2(OP)                                                        \
+static inline struct brw_instruction *brw_##OP(struct brw_compile *p,   \
+                                               struct brw_reg dest,     \
+                                               struct brw_reg src0,     \
+                                                struct brw_reg src1)    \
+{                                                                       \
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);               \
+}
+/* Rounding operations (other than RNDD) require two instructions - the first
+ * stores a rounded value (possibly the wrong way) in the dest register, but
+ * also sets a per-channel "increment bit" in the flag register.  A predicated
+ * add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
+ */
+#define ROUND(OP)                                                       \
+static inline void brw_##OP(struct brw_compile *p,                      \
+                            struct brw_reg dest,                        \
+                            struct brw_reg src)                         \
+{                                                                       \
+        struct brw_instruction *rnd, *add;                              \
+        rnd = brw_next_insn(p, BRW_OPCODE_##OP);                        \
+        brw_set_dest(p, rnd, dest);                                     \
+        brw_set_src0(p, rnd, src);                                      \
+        if (p->gen < 060) {                                             \
+                /* turn on round-increments */                          \
+                rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
+                add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));          \
+                add->header.predicate_control = BRW_PREDICATE_NORMAL;   \
+        }                                                               \
+}
+static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
+                                               unsigned opcode,
+                                               struct brw_reg dest,
+                                               struct brw_reg src)
+{
+        struct brw_instruction *insn = brw_next_insn(p, opcode);
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src);
+        return insn;
+}
+static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
+                                               unsigned opcode,
+                                               struct brw_reg dest,
+                                               struct brw_reg src0,
+                                               struct brw_reg src1 )
+{
+        struct brw_instruction *insn = brw_next_insn(p, opcode);
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, src1);
+        return insn;
+}
+static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
+                                              struct brw_reg dest,
+                                              struct brw_reg src0,
+                                              struct brw_reg src1)
+{
+        /* 6.2.2: add */
+        if (src0.type == BRW_REGISTER_TYPE_F ||
+            (src0.file == BRW_IMMEDIATE_VALUE &&
+             src0.type == BRW_REGISTER_TYPE_VF)) {
+                assert(src1.type != BRW_REGISTER_TYPE_UD);
+                assert(src1.type != BRW_REGISTER_TYPE_D);
+        }
+        if (src1.type == BRW_REGISTER_TYPE_F ||
+            (src1.file == BRW_IMMEDIATE_VALUE &&
+             src1.type == BRW_REGISTER_TYPE_VF)) {
+                assert(src0.type != BRW_REGISTER_TYPE_UD);
+                assert(src0.type != BRW_REGISTER_TYPE_D);
+        }
+        return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
+}
+static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
+                                              struct brw_reg dest,
+                                              struct brw_reg src0,
+                                              struct brw_reg src1)
+{
+        /* 6.32.38: mul */
+        if (src0.type == BRW_REGISTER_TYPE_D ||
+            src0.type == BRW_REGISTER_TYPE_UD ||
+            src1.type == BRW_REGISTER_TYPE_D ||
+            src1.type == BRW_REGISTER_TYPE_UD) {
+                assert(dest.type != BRW_REGISTER_TYPE_F);
+        }
+        if (src0.type == BRW_REGISTER_TYPE_F ||
+            (src0.file == BRW_IMMEDIATE_VALUE &&
+             src0.type == BRW_REGISTER_TYPE_VF)) {
+                assert(src1.type != BRW_REGISTER_TYPE_UD);
+                assert(src1.type != BRW_REGISTER_TYPE_D);
+        }
+        if (src1.type == BRW_REGISTER_TYPE_F ||
+            (src1.file == BRW_IMMEDIATE_VALUE &&
+             src1.type == BRW_REGISTER_TYPE_VF)) {
+                assert(src0.type != BRW_REGISTER_TYPE_UD);
+                assert(src0.type != BRW_REGISTER_TYPE_D);
+        }
+        assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+               src0.nr != BRW_ARF_ACCUMULATOR);
+        assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
+               src1.nr != BRW_ARF_ACCUMULATOR);
+        return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
+}
+static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
+                                               struct brw_reg dest,
+                                               struct brw_reg src0,
+                                               struct brw_reg src1)
+{
+        struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+        insn->header.execution_size = 1;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.mask_control = BRW_MASK_DISABLE;
+        p->current->header.predicate_control = BRW_PREDICATE_NONE;
+        return insn;
+}
+ALU1(MOV);
+ALU2(SEL);
+ALU1(NOT);
+ALU2(AND);
+ALU2(OR);
+ALU2(XOR);
+ALU2(SHR);
+ALU2(SHL);
+ALU2(RSR);
+ALU2(RSL);
+ALU2(ASR);
+ALU1(FRC);
+ALU1(RNDD);
+ALU2(MAC);
+ALU2(MACH);
+ALU1(LZD);
+ALU2(DP4);
+ALU2(DPH);
+ALU2(DP3);
+ALU2(DP2);
+ALU2(LINE);
+ALU2(PLN);
+ROUND(RNDZ);
+ROUND(RNDE);
+#undef ALU1
+#undef ALU2
+#undef ROUND
+/* Helpers for SEND instruction */
+void brw_set_dp_read_message(struct brw_compile *p,
+                             struct brw_instruction *insn,
+                             unsigned binding_table_index,
+                             unsigned msg_control,
+                             unsigned msg_type,
+                             unsigned target_cache,
+                             unsigned msg_length,
+                             unsigned response_length);
+void brw_set_dp_write_message(struct brw_compile *p,
+                              struct brw_instruction *insn,
+                              unsigned binding_table_index,
+                              unsigned msg_control,
+                              unsigned msg_type,
+                              unsigned msg_length,
+                              bool header_present,
+                              bool last_render_target,
+                              unsigned response_length,
+                              bool end_of_thread,
+                              bool send_commit_msg);
+void brw_urb_WRITE(struct brw_compile *p,
+                   struct brw_reg dest,
+                   unsigned msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   bool used,
+                   unsigned msg_length,
+                   unsigned response_length,
+                   bool eot,
+                   bool writes_complete,
+                   unsigned offset,
+                   unsigned swizzle);
+void brw_ff_sync(struct brw_compile *p,
+                 struct brw_reg dest,
+                 unsigned msg_reg_nr,
+                 struct brw_reg src0,
+                 bool allocate,
+                 unsigned response_length,
+                 bool eot);
+void brw_fb_WRITE(struct brw_compile *p,
+                  int dispatch_width,
+                  unsigned msg_reg_nr,
+                  struct brw_reg src0,
+                  unsigned msg_control,
+                  unsigned binding_table_index,
+                  unsigned msg_length,
+                  unsigned response_length,
+                  bool eot,
+                  bool header_present);
+void brw_SAMPLE(struct brw_compile *p,
+                struct brw_reg dest,
+                unsigned msg_reg_nr,
+                struct brw_reg src0,
+                unsigned binding_table_index,
+                unsigned sampler,
+                unsigned writemask,
+                unsigned msg_type,
+                unsigned response_length,
+                unsigned msg_length,
+                bool header_present,
+                unsigned simd_mode);
+void brw_math_16(struct brw_compile *p,
+                 struct brw_reg dest,
+                 unsigned function,
+                 unsigned saturate,
+                 unsigned msg_reg_nr,
+                 struct brw_reg src,
+                 unsigned precision);
+void brw_math(struct brw_compile *p,
+              struct brw_reg dest,
+              unsigned function,
+              unsigned saturate,
+              unsigned msg_reg_nr,
+              struct brw_reg src,
+              unsigned data_type,
+              unsigned precision);
+void brw_math2(struct brw_compile *p,
+               struct brw_reg dest,
+               unsigned function,
+               struct brw_reg src0,
+               struct brw_reg src1);
+void brw_oword_block_read(struct brw_compile *p,
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index);
+void brw_oword_block_read_scratch(struct brw_compile *p,
+                                  struct brw_reg dest,
+                                  struct brw_reg mrf,
+                                  int num_regs,
+                                  unsigned offset);
+void brw_oword_block_write_scratch(struct brw_compile *p,
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   unsigned offset);
+void brw_dword_scattered_read(struct brw_compile *p,
+                              struct brw_reg dest,
+                              struct brw_reg mrf,
+                              uint32_t bind_table_index);
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      unsigned location,
+                      unsigned bind_table_index);
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+                               struct brw_reg dest,
+                               struct brw_reg addrReg,
+                               unsigned offset,
+                               unsigned bind_table_index);
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+                               unsigned execute_size);
+struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
+                                struct brw_reg src0, struct brw_reg src1);
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);
+/* DO/WHILE loops:
+*/
+struct brw_instruction *brw_DO(struct brw_compile *p,
+                               unsigned execute_size);
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+                                  struct brw_instruction *patch_insn);
+struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
+struct brw_instruction *gen6_CONT(struct brw_compile *p,
+                                  struct brw_instruction *do_insn);
+/* Forward jumps:
+*/
+void brw_land_fwd_jump(struct brw_compile *p,
+                       struct brw_instruction *jmp_insn);
+void brw_NOP(struct brw_compile *p);
+void brw_WAIT(struct brw_compile *p);
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+             struct brw_reg dest,
+             unsigned conditional,
+             struct brw_reg src0,
+             struct brw_reg src1);
+void brw_print_reg(struct brw_reg reg);
+static inline void brw_math_invert(struct brw_compile *p,
+                                   struct brw_reg dst,
+                                   struct brw_reg src)
+{
+        brw_math(p,
+                 dst,
+                 BRW_MATH_FUNCTION_INV,
+                 BRW_MATH_SATURATE_NONE,
+,
+                 src,
+                 BRW_MATH_PRECISION_FULL,
+                 BRW_MATH_DATA_VECTOR);
+}
+void brw_set_uip_jip(struct brw_compile *p);
+uint32_t brw_swap_cmod(uint32_t cmod);
+void brw_disasm(FILE *file,
+                const struct brw_instruction *inst,
+                int gen);
+#endif

 /drivers/video/Intel-2D/brw/brw_eu_emit.c
 ,0 → 1,2002
+/*
+   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+   develop this 3D driver.
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+   The above copyright notice and this permission notice (including the
+   next paragraph) shall be included in all copies or substantial
+   portions of the Software.
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **********************************************************************/
+/*
+ * Authors:
+ *   Keith Whitwell <keith@tungstengraphics.com>
+ */
+#include "brw_eu.h"
+#include <string.h>
+#include <stdlib.h>
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+static void guess_execution_size(struct brw_compile *p,
+                                 struct brw_instruction *insn,
+                                 struct brw_reg reg)
+{
+        if (reg.width == BRW_WIDTH_8 && p->compressed)
+                insn->header.execution_size = BRW_EXECUTE_16;
+        else
+                insn->header.execution_size = reg.width;
+}
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case.  This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+void
+gen6_resolve_implied_move(struct brw_compile *p,
+                          struct brw_reg *src,
+                          unsigned msg_reg_nr)
+{
+        if (p->gen < 060)
+                return;
+        if (src->file == BRW_MESSAGE_REGISTER_FILE)
+                return;
+        if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+                brw_push_insn_state(p);
+                brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
+                brw_pop_insn_state(p);
+        }
+        *src = brw_message_reg(msg_reg_nr);
+}
+static void
+gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
+{
+        /* From the BSpec / ISA Reference / send - [DevIVB+]:
+         * "The send with EOT should use register space R112-R127 for <src>. This is
+         *  to enable loading of a new thread into the same slot while the message
+         *  with EOT for current thread is pending dispatch."
+         *
+         * Since we're pretending to have 16 MRFs anyway, we may as well use the
+         * registers required for messages with EOT.
+         */
+        if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+                reg->file = BRW_GENERAL_REGISTER_FILE;
+                reg->nr += 111;
+        }
+}
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+             struct brw_reg dest)
+{
+        if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
+            dest.file != BRW_MESSAGE_REGISTER_FILE)
+                assert(dest.nr < 128);
+        gen7_convert_mrf_to_grf(p, &dest);
+        insn->bits1.da1.dest_reg_file = dest.file;
+        insn->bits1.da1.dest_reg_type = dest.type;
+        insn->bits1.da1.dest_address_mode = dest.address_mode;
+        if (dest.address_mode == BRW_ADDRESS_DIRECT) {
+                insn->bits1.da1.dest_reg_nr = dest.nr;
+                if (insn->header.access_mode == BRW_ALIGN_1) {
+                        insn->bits1.da1.dest_subreg_nr = dest.subnr;
+                        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+                                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+                        insn->bits1.da1.dest_horiz_stride = dest.hstride;
+                } else {
+                        insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+                        insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+                        /* even ignored in da16, still need to set as '01' */
+                        insn->bits1.da16.dest_horiz_stride = 1;
+                }
+        } else {
+                insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+                /* These are different sizes in align1 vs align16:
+                */
+                if (insn->header.access_mode == BRW_ALIGN_1) {
+                        insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+                        if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+                                dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+                        insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+                }
+                else {
+                        insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+                        /* even ignored in da16, still need to set as '01' */
+                        insn->bits1.ia16.dest_horiz_stride = 1;
+                }
+        }
+        guess_execution_size(p, insn, dest);
+}
+static const int reg_type_size[8] = {
+        [0] = 4,
+        [1] = 4,
+        [2] = 2,
+        [3] = 2,
+        [4] = 1,
+        [5] = 1,
+        [7] = 4
+};
+static void
+validate_reg(struct brw_instruction *insn, struct brw_reg reg)
+{
+        int hstride_for_reg[] = {0, 1, 2, 4};
+        int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
+        int width_for_reg[] = {1, 2, 4, 8, 16};
+        int execsize_for_reg[] = {1, 2, 4, 8, 16};
+        int width, hstride, vstride, execsize;
+        if (reg.file == BRW_IMMEDIATE_VALUE) {
+                /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
+                 * mean the destination has to be 128-bit aligned and the
+                 * destination horiz stride has to be a word.
+                 */
+                if (reg.type == BRW_REGISTER_TYPE_V) {
+                        assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
+                               reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
+                }
+                return;
+        }
+        if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+            reg.file == BRW_ARF_NULL)
+                return;
+        hstride = hstride_for_reg[reg.hstride];
+        if (reg.vstride == 0xf) {
+                vstride = -1;
+        } else {
+                vstride = vstride_for_reg[reg.vstride];
+        }
+        width = width_for_reg[reg.width];
+        execsize = execsize_for_reg[insn->header.execution_size];
+        /* Restrictions from 3.3.10: Register Region Restrictions. */
+        /* 3. */
+        assert(execsize >= width);
+        /* 4. */
+        if (execsize == width && hstride != 0) {
+                assert(vstride == -1 || vstride == width * hstride);
+        }
+        /* 5. */
+        if (execsize == width && hstride == 0) {
+                /* no restriction on vstride. */
+        }
+        /* 6. */
+        if (width == 1) {
+                assert(hstride == 0);
+        }
+        /* 7. */
+        if (execsize == 1 && width == 1) {
+                assert(hstride == 0);
+                assert(vstride == 0);
+        }
+        /* 8. */
+        if (vstride == 0 && hstride == 0) {
+                assert(width == 1);
+        }
+        /* 10. Check destination issues. */
+}
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+             struct brw_reg reg)
+{
+        if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+                assert(reg.nr < 128);
+        gen7_convert_mrf_to_grf(p, &reg);
+        validate_reg(insn, reg);
+        insn->bits1.da1.src0_reg_file = reg.file;
+        insn->bits1.da1.src0_reg_type = reg.type;
+        insn->bits2.da1.src0_abs = reg.abs;
+        insn->bits2.da1.src0_negate = reg.negate;
+        insn->bits2.da1.src0_address_mode = reg.address_mode;
+        if (reg.file == BRW_IMMEDIATE_VALUE) {
+                insn->bits3.ud = reg.dw1.ud;
+                /* Required to set some fields in src1 as well:
+                */
+                insn->bits1.da1.src1_reg_file = 0; /* arf */
+                insn->bits1.da1.src1_reg_type = reg.type;
+        } else {
+                if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+                        if (insn->header.access_mode == BRW_ALIGN_1) {
+                                insn->bits2.da1.src0_subreg_nr = reg.subnr;
+                                insn->bits2.da1.src0_reg_nr = reg.nr;
+                        } else {
+                                insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+                                insn->bits2.da16.src0_reg_nr = reg.nr;
+                        }
+                } else {
+                        insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+                        if (insn->header.access_mode == BRW_ALIGN_1) {
+                                insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
+                        } else {
+                                insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+                        }
+                }
+                if (insn->header.access_mode == BRW_ALIGN_1) {
+                        if (reg.width == BRW_WIDTH_1 &&
+                            insn->header.execution_size == BRW_EXECUTE_1) {
+                                insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+                                insn->bits2.da1.src0_width = BRW_WIDTH_1;
+                                insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+                        } else {
+                                insn->bits2.da1.src0_horiz_stride = reg.hstride;
+                                insn->bits2.da1.src0_width = reg.width;
+                                insn->bits2.da1.src0_vert_stride = reg.vstride;
+                        }
+                } else {
+                        insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+                        insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+                        insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+                        insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+                        /* This is an oddity of the fact we're using the same
+                         * descriptions for registers in align_16 as align_1:
+                         */
+                        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+                                insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+                        else
+                                insn->bits2.da16.src0_vert_stride = reg.vstride;
+                }
+        }
+}
+void brw_set_src1(struct brw_compile *p,
+                  struct brw_instruction *insn,
+                  struct brw_reg reg)
+{
+        assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+        assert(reg.nr < 128);
+        gen7_convert_mrf_to_grf(p, &reg);
+        validate_reg(insn, reg);
+        insn->bits1.da1.src1_reg_file = reg.file;
+        insn->bits1.da1.src1_reg_type = reg.type;
+        insn->bits3.da1.src1_abs = reg.abs;
+        insn->bits3.da1.src1_negate = reg.negate;
+        /* Only src1 can be immediate in two-argument instructions. */
+        assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+        if (reg.file == BRW_IMMEDIATE_VALUE) {
+                insn->bits3.ud = reg.dw1.ud;
+        } else {
+                /* This is a hardware restriction, which may or may not be lifted
+                 * in the future:
+                 */
+                assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+                /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
+                if (insn->header.access_mode == BRW_ALIGN_1) {
+                        insn->bits3.da1.src1_subreg_nr = reg.subnr;
+                        insn->bits3.da1.src1_reg_nr = reg.nr;
+                } else {
+                        insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+                        insn->bits3.da16.src1_reg_nr = reg.nr;
+                }
+                if (insn->header.access_mode == BRW_ALIGN_1) {
+                        if (reg.width == BRW_WIDTH_1 &&
+                            insn->header.execution_size == BRW_EXECUTE_1) {
+                                insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+                                insn->bits3.da1.src1_width = BRW_WIDTH_1;
+                                insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+                        } else {
+                                insn->bits3.da1.src1_horiz_stride = reg.hstride;
+                                insn->bits3.da1.src1_width = reg.width;
+                                insn->bits3.da1.src1_vert_stride = reg.vstride;
+                        }
+                } else {
+                        insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+                        insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+                        insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+                        insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+                        /* This is an oddity of the fact we're using the same
+                         * descriptions for registers in align_16 as align_1:
+                         */
+                        if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+                                insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+                        else
+                                insn->bits3.da16.src1_vert_stride = reg.vstride;
+                }
+        }
+}
+/**
+ * Set the Message Descriptor and Extended Message Descriptor fields
+ * for SEND messages.
+ *
+ * \note This zeroes out the Function Control bits, so it must be called
+ *       \b before filling out any message-specific data.  Callers can
+ *       choose not to fill in irrelevant bits; they will be zero.
+ */
+static void
+brw_set_message_descriptor(struct brw_compile *p,
+                           struct brw_instruction *inst,
+                           enum brw_message_target sfid,
+                           unsigned msg_length,
+                           unsigned response_length,
+                           bool header_present,
+                           bool end_of_thread)
+{
+        brw_set_src1(p, inst, brw_imm_d(0));
+        if (p->gen >= 050) {
+                inst->bits3.generic_gen5.header_present = header_present;
+                inst->bits3.generic_gen5.response_length = response_length;
+                inst->bits3.generic_gen5.msg_length = msg_length;
+                inst->bits3.generic_gen5.end_of_thread = end_of_thread;
+                if (p->gen >= 060) {
+                        /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+                        inst->header.destreg__conditionalmod = sfid;
+                } else {
+                        /* Set Extended Message Descriptor (ex_desc) */
+                        inst->bits2.send_gen5.sfid = sfid;
+                        inst->bits2.send_gen5.end_of_thread = end_of_thread;
+                }
+        } else {
+                inst->bits3.generic.response_length = response_length;
+                inst->bits3.generic.msg_length = msg_length;
+                inst->bits3.generic.msg_target = sfid;
+                inst->bits3.generic.end_of_thread = end_of_thread;
+        }
+}
+static void brw_set_math_message(struct brw_compile *p,
+                                 struct brw_instruction *insn,
+                                 unsigned function,
+                                 unsigned integer_type,
+                                 bool low_precision,
+                                 bool saturate,
+                                 unsigned dataType)
+{
+        unsigned msg_length;
+        unsigned response_length;
+        /* Infer message length from the function */
+        switch (function) {
+        case BRW_MATH_FUNCTION_POW:
+        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
+        case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
+        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+                msg_length = 2;
+                break;
+        default:
+                msg_length = 1;
+                break;
+        }
+        /* Infer response length from the function */
+        switch (function) {
+        case BRW_MATH_FUNCTION_SINCOS:
+        case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
+                response_length = 2;
+                break;
+        default:
+                response_length = 1;
+                break;
+        }
+        brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
+                                   msg_length, response_length,
+                                   false, false);
+        if (p->gen == 050) {
+                insn->bits3.math_gen5.function = function;
+                insn->bits3.math_gen5.int_type = integer_type;
+                insn->bits3.math_gen5.precision = low_precision;
+                insn->bits3.math_gen5.saturate = saturate;
+                insn->bits3.math_gen5.data_type = dataType;
+                insn->bits3.math_gen5.snapshot = 0;
+        } else {
+                insn->bits3.math.function = function;
+                insn->bits3.math.int_type = integer_type;
+                insn->bits3.math.precision = low_precision;
+                insn->bits3.math.saturate = saturate;
+                insn->bits3.math.data_type = dataType;
+        }
+}
+static void brw_set_ff_sync_message(struct brw_compile *p,
+                                    struct brw_instruction *insn,
+                                    bool allocate,
+                                    unsigned response_length,
+                                    bool end_of_thread)
+{
+        brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+, response_length,
+                                   true, end_of_thread);
+        insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
+        insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
+        insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
+        insn->bits3.urb_gen5.allocate = allocate;
+        insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
+        insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
+}
+static void brw_set_urb_message(struct brw_compile *p,
+                                struct brw_instruction *insn,
+                                bool allocate,
+                                bool used,
+                                unsigned msg_length,
+                                unsigned response_length,
+                                bool end_of_thread,
+                                bool complete,
+                                unsigned offset,
+                                unsigned swizzle_control)
+{
+        brw_set_message_descriptor(p, insn, BRW_SFID_URB,
+                                   msg_length, response_length, true, end_of_thread);
+        if (p->gen >= 070) {
+                insn->bits3.urb_gen7.opcode = 0;        /* URB_WRITE_HWORD */
+                insn->bits3.urb_gen7.offset = offset;
+                assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+                insn->bits3.urb_gen7.swizzle_control = swizzle_control;
+                /* per_slot_offset = 0 makes it ignore offsets in message header */
+                insn->bits3.urb_gen7.per_slot_offset = 0;
+                insn->bits3.urb_gen7.complete = complete;
+        } else if (p->gen >= 050) {
+                insn->bits3.urb_gen5.opcode = 0;        /* URB_WRITE */
+                insn->bits3.urb_gen5.offset = offset;
+                insn->bits3.urb_gen5.swizzle_control = swizzle_control;
+                insn->bits3.urb_gen5.allocate = allocate;
+                insn->bits3.urb_gen5.used = used;       /* ? */
+                insn->bits3.urb_gen5.complete = complete;
+        } else {
+                insn->bits3.urb.opcode = 0;     /* ? */
+                insn->bits3.urb.offset = offset;
+                insn->bits3.urb.swizzle_control = swizzle_control;
+                insn->bits3.urb.allocate = allocate;
+                insn->bits3.urb.used = used;    /* ? */
+                insn->bits3.urb.complete = complete;
+        }
+}
+void
+brw_set_dp_write_message(struct brw_compile *p,
+                         struct brw_instruction *insn,
+                         unsigned binding_table_index,
+                         unsigned msg_control,
+                         unsigned msg_type,
+                         unsigned msg_length,
+                         bool header_present,
+                         bool last_render_target,
+                         unsigned response_length,
+                         bool end_of_thread,
+                         bool send_commit_msg)
+{
+        unsigned sfid;
+        if (p->gen >= 070) {
+                /* Use the Render Cache for RT writes; otherwise use the Data Cache */
+                if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
+                        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+                else
+                        sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+        } else if (p->gen >= 060) {
+                /* Use the render cache for all write messages. */
+                sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+        } else {
+                sfid = BRW_SFID_DATAPORT_WRITE;
+        }
+        brw_set_message_descriptor(p, insn, sfid,
+                                   msg_length, response_length,
+                                   header_present, end_of_thread);
+        if (p->gen >= 070) {
+                insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+                insn->bits3.gen7_dp.msg_control = msg_control;
+                insn->bits3.gen7_dp.last_render_target = last_render_target;
+                insn->bits3.gen7_dp.msg_type = msg_type;
+        } else if (p->gen >= 060) {
+                insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+                insn->bits3.gen6_dp.msg_control = msg_control;
+                insn->bits3.gen6_dp.last_render_target = last_render_target;
+                insn->bits3.gen6_dp.msg_type = msg_type;
+                insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
+        } else if (p->gen >= 050) {
+                insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
+                insn->bits3.dp_write_gen5.msg_control = msg_control;
+                insn->bits3.dp_write_gen5.last_render_target = last_render_target;
+                insn->bits3.dp_write_gen5.msg_type = msg_type;
+                insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
+        } else {
+                insn->bits3.dp_write.binding_table_index = binding_table_index;
+                insn->bits3.dp_write.msg_control = msg_control;
+                insn->bits3.dp_write.last_render_target = last_render_target;
+                insn->bits3.dp_write.msg_type = msg_type;
+                insn->bits3.dp_write.send_commit_msg = send_commit_msg;
+        }
+}
+void
+brw_set_dp_read_message(struct brw_compile *p,
+                        struct brw_instruction *insn,
+                        unsigned binding_table_index,
+                        unsigned msg_control,
+                        unsigned msg_type,
+                        unsigned target_cache,
+                        unsigned msg_length,
+                        unsigned response_length)
+{
+        unsigned sfid;
+        if (p->gen >= 070) {
+                sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
+        } else if (p->gen >= 060) {
+                if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
+                        sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+                else
+                        sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
+        } else {
+                sfid = BRW_SFID_DATAPORT_READ;
+        }
+        brw_set_message_descriptor(p, insn, sfid,
+                                   msg_length, response_length,
+                                   true, false);
+        if (p->gen >= 070) {
+                insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+                insn->bits3.gen7_dp.msg_control = msg_control;
+                insn->bits3.gen7_dp.last_render_target = 0;
+                insn->bits3.gen7_dp.msg_type = msg_type;
+        } else if (p->gen >= 060) {
+                insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+                insn->bits3.gen6_dp.msg_control = msg_control;
+                insn->bits3.gen6_dp.last_render_target = 0;
+                insn->bits3.gen6_dp.msg_type = msg_type;
+                insn->bits3.gen6_dp.send_commit_msg = 0;
+        } else if (p->gen >= 050) {
+                insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
+                insn->bits3.dp_read_gen5.msg_control = msg_control;
+                insn->bits3.dp_read_gen5.msg_type = msg_type;
+                insn->bits3.dp_read_gen5.target_cache = target_cache;
+        } else if (p->gen >= 045) {
+                insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+                insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
+                insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
+                insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
+        } else {
+                insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+                insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
+                insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
+                insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
+        }
+}
+static void brw_set_sampler_message(struct brw_compile *p,
+                                    struct brw_instruction *insn,
+                                    unsigned binding_table_index,
+                                    unsigned sampler,
+                                    unsigned msg_type,
+                                    unsigned response_length,
+                                    unsigned msg_length,
+                                    bool header_present,
+                                    unsigned simd_mode)
+{
+        brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
+                                   msg_length, response_length,
+                                   header_present, false);
+        if (p->gen >= 070) {
+                insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+                insn->bits3.sampler_gen7.sampler = sampler;
+                insn->bits3.sampler_gen7.msg_type = msg_type;
+                insn->bits3.sampler_gen7.simd_mode = simd_mode;
+        } else if (p->gen >= 050) {
+                insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
+                insn->bits3.sampler_gen5.sampler = sampler;
+                insn->bits3.sampler_gen5.msg_type = msg_type;
+                insn->bits3.sampler_gen5.simd_mode = simd_mode;
+        } else if (p->gen >= 045) {
+                insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+                insn->bits3.sampler_g4x.sampler = sampler;
+                insn->bits3.sampler_g4x.msg_type = msg_type;
+        } else {
+                insn->bits3.sampler.binding_table_index = binding_table_index;
+                insn->bits3.sampler.sampler = sampler;
+                insn->bits3.sampler.msg_type = msg_type;
+                insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+        }
+}
+void brw_NOP(struct brw_compile *p)
+{
+        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
+        brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
+        brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
+        brw_set_src1(p, insn, brw_imm_ud(0x0));
+}
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+static void
+push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+        p->if_stack[p->if_stack_depth] = inst;
+        p->if_stack_depth++;
+        if (p->if_stack_array_size <= p->if_stack_depth) {
+                p->if_stack_array_size *= 2;
+                p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
+        }
+}
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack).  Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off.  If the stack is now empty, normal execution resumes.
+ */
+struct brw_instruction *
+brw_IF(struct brw_compile *p, unsigned execute_size)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_IF);
+        /* Override the defaults for this instruction: */
+        if (p->gen < 060) {
+                brw_set_dest(p, insn, brw_ip_reg());
+                brw_set_src0(p, insn, brw_ip_reg());
+                brw_set_src1(p, insn, brw_imm_d(0x0));
+        } else if (p->gen < 070) {
+                brw_set_dest(p, insn, brw_imm_w(0));
+                insn->bits1.branch_gen6.jump_count = 0;
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
+        } else {
+                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, brw_imm_ud(0));
+                insn->bits3.break_cont.jip = 0;
+                insn->bits3.break_cont.uip = 0;
+        }
+        insn->header.execution_size = execute_size;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+        insn->header.mask_control = BRW_MASK_ENABLE;
+        if (!p->single_program_flow)
+                insn->header.thread_control = BRW_THREAD_SWITCH;
+        p->current->header.predicate_control = BRW_PREDICATE_NONE;
+        push_if_stack(p, insn);
+        return insn;
+}
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier).  It is not used on gen7.
+ */
+struct brw_instruction *
+gen6_IF(struct brw_compile *p, uint32_t conditional,
+        struct brw_reg src0, struct brw_reg src1)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_IF);
+        brw_set_dest(p, insn, brw_imm_w(0));
+        if (p->compressed) {
+                insn->header.execution_size = BRW_EXECUTE_16;
+        } else {
+                insn->header.execution_size = BRW_EXECUTE_8;
+        }
+        insn->bits1.branch_gen6.jump_count = 0;
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, src1);
+        assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
+        assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+        insn->header.destreg__conditionalmod = conditional;
+        if (!p->single_program_flow)
+                insn->header.thread_control = BRW_THREAD_SWITCH;
+        push_if_stack(p, insn);
+        return insn;
+}
+/**
+ * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
+ */
+static void
+convert_IF_ELSE_to_ADD(struct brw_compile *p,
+                       struct brw_instruction *if_inst,
+                       struct brw_instruction *else_inst)
+{
+        /* The next instruction (where the ENDIF would be, if it existed) */
+        struct brw_instruction *next_inst = &p->store[p->nr_insn];
+        assert(p->single_program_flow);
+        assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+        assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+        assert(if_inst->header.execution_size == BRW_EXECUTE_1);
+        /* Convert IF to an ADD instruction that moves the instruction pointer
+         * to the first instruction of the ELSE block.  If there is no ELSE
+         * block, point to where ENDIF would be.  Reverse the predicate.
+         *
+         * There's no need to execute an ENDIF since we don't need to do any
+         * stack operations, and if we're currently executing, we just want to
+         * continue normally.
+         */
+        if_inst->header.opcode = BRW_OPCODE_ADD;
+        if_inst->header.predicate_inverse = 1;
+        if (else_inst != NULL) {
+                /* Convert ELSE to an ADD instruction that points where the ENDIF
+                 * would be.
+                 */
+                else_inst->header.opcode = BRW_OPCODE_ADD;
+                if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
+                else_inst->bits3.ud = (next_inst - else_inst) * 16;
+        } else {
+                if_inst->bits3.ud = (next_inst - if_inst) * 16;
+        }
+}
+/**
+ * Patch IF and ELSE instructions with appropriate jump targets.
+ */
+static void
+patch_IF_ELSE(struct brw_compile *p,
+              struct brw_instruction *if_inst,
+              struct brw_instruction *else_inst,
+              struct brw_instruction *endif_inst)
+{
+        unsigned br = 1;
+        assert(!p->single_program_flow);
+        assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+        assert(endif_inst != NULL);
+        assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+        /* Jump count is for 64bit data chunk each, so one 128bit instruction
+         * requires 2 chunks.
+         */
+        if (p->gen >= 050)
+                br = 2;
+        assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
+        endif_inst->header.execution_size = if_inst->header.execution_size;
+        if (else_inst == NULL) {
+                /* Patch IF -> ENDIF */
+                if (p->gen < 060) {
+                        /* Turn it into an IFF, which means no mask stack operations for
+                         * all-false and jumping past the ENDIF.
+                         */
+                        if_inst->header.opcode = BRW_OPCODE_IFF;
+                        if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
+                        if_inst->bits3.if_else.pop_count = 0;
+                        if_inst->bits3.if_else.pad0 = 0;
+                } else if (p->gen < 070) {
+                        /* As of gen6, there is no IFF and IF must point to the ENDIF. */
+                        if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+                } else {
+                        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+                        if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
+                }
+        } else {
+                else_inst->header.execution_size = if_inst->header.execution_size;
+                /* Patch IF -> ELSE */
+                if (p->gen < 060) {
+                        if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
+                        if_inst->bits3.if_else.pop_count = 0;
+                        if_inst->bits3.if_else.pad0 = 0;
+                } else if (p->gen <= 070) {
+                        if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
+                }
+                /* Patch ELSE -> ENDIF */
+                if (p->gen < 060) {
+                        /* BRW_OPCODE_ELSE pre-gen6 should point just past the
+                         * matching ENDIF.
+                         */
+                        else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
+                        else_inst->bits3.if_else.pop_count = 1;
+                        else_inst->bits3.if_else.pad0 = 0;
+                } else if (p->gen < 070) {
+                        /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
+                        else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+                } else {
+                        /* The IF instruction's JIP should point just past the ELSE */
+                        if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+                        /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+                        if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+                        else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
+                }
+        }
+}
+void
+brw_ELSE(struct brw_compile *p)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_ELSE);
+        if (p->gen < 060) {
+                brw_set_dest(p, insn, brw_ip_reg());
+                brw_set_src0(p, insn, brw_ip_reg());
+                brw_set_src1(p, insn, brw_imm_d(0x0));
+        } else if (p->gen < 070) {
+                brw_set_dest(p, insn, brw_imm_w(0));
+                insn->bits1.branch_gen6.jump_count = 0;
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
+        } else {
+                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, brw_imm_ud(0));
+                insn->bits3.break_cont.jip = 0;
+                insn->bits3.break_cont.uip = 0;
+        }
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.mask_control = BRW_MASK_ENABLE;
+        if (!p->single_program_flow)
+                insn->header.thread_control = BRW_THREAD_SWITCH;
+        push_if_stack(p, insn);
+}
+void
+brw_ENDIF(struct brw_compile *p)
+{
+        struct brw_instruction *insn;
+        struct brw_instruction *else_inst = NULL;
+        struct brw_instruction *if_inst = NULL;
+        /* Pop the IF and (optional) ELSE instructions from the stack */
+        p->if_stack_depth--;
+        if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
+                else_inst = p->if_stack[p->if_stack_depth];
+                p->if_stack_depth--;
+        }
+        if_inst = p->if_stack[p->if_stack_depth];
+        if (p->single_program_flow) {
+                /* ENDIF is useless; don't bother emitting it. */
+                convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
+                return;
+        }
+        insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
+        if (p->gen < 060) {
+                brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
+                brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
+                brw_set_src1(p, insn, brw_imm_d(0x0));
+        } else if (p->gen < 070) {
+                brw_set_dest(p, insn, brw_imm_w(0));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
+        } else {
+                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, brw_imm_ud(0));
+        }
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.mask_control = BRW_MASK_ENABLE;
+        insn->header.thread_control = BRW_THREAD_SWITCH;
+        /* Also pop item off the stack in the endif instruction: */
+        if (p->gen < 060) {
+                insn->bits3.if_else.jump_count = 0;
+                insn->bits3.if_else.pop_count = 1;
+                insn->bits3.if_else.pad0 = 0;
+        } else if (p->gen < 070) {
+                insn->bits1.branch_gen6.jump_count = 2;
+        } else {
+                insn->bits3.break_cont.jip = 2;
+        }
+        patch_IF_ELSE(p, if_inst, else_inst, insn);
+}
+struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_BREAK);
+        if (p->gen >= 060) {
+                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, brw_imm_d(0x0));
+        } else {
+                brw_set_dest(p, insn, brw_ip_reg());
+                brw_set_src0(p, insn, brw_ip_reg());
+                brw_set_src1(p, insn, brw_imm_d(0x0));
+                insn->bits3.if_else.pad0 = 0;
+                insn->bits3.if_else.pop_count = pop_count;
+        }
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.execution_size = BRW_EXECUTE_8;
+        return insn;
+}
+struct brw_instruction *gen6_CONT(struct brw_compile *p,
+                                  struct brw_instruction *do_insn)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
+        brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+        brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+        brw_set_dest(p, insn, brw_ip_reg());
+        brw_set_src0(p, insn, brw_ip_reg());
+        brw_set_src1(p, insn, brw_imm_d(0x0));
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.execution_size = BRW_EXECUTE_8;
+        return insn;
+}
+struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
+{
+        struct brw_instruction *insn;
+        insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
+        brw_set_dest(p, insn, brw_ip_reg());
+        brw_set_src0(p, insn, brw_ip_reg());
+        brw_set_src1(p, insn, brw_imm_d(0x0));
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.execution_size = BRW_EXECUTE_8;
+        /* insn->header.mask_control = BRW_MASK_DISABLE; */
+        insn->bits3.if_else.pad0 = 0;
+        insn->bits3.if_else.pop_count = pop_count;
+        return insn;
+}
+/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop.  We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO.  WHILE
+ * just points back to the first instruction of the loop.
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
+{
+        if (p->gen >= 060 || p->single_program_flow) {
+                return &p->store[p->nr_insn];
+        } else {
+                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
+                /* Override the defaults for this instruction:
+                */
+                brw_set_dest(p, insn, brw_null_reg());
+                brw_set_src0(p, insn, brw_null_reg());
+                brw_set_src1(p, insn, brw_null_reg());
+                insn->header.compression_control = BRW_COMPRESSION_NONE;
+                insn->header.execution_size = execute_size;
+                insn->header.predicate_control = BRW_PREDICATE_NONE;
+                /* insn->header.mask_control = BRW_MASK_ENABLE; */
+                /* insn->header.mask_control = BRW_MASK_DISABLE; */
+                return insn;
+        }
+}
+struct brw_instruction *brw_WHILE(struct brw_compile *p,
+                                  struct brw_instruction *do_insn)
+{
+        struct brw_instruction *insn;
+        unsigned br = 1;
+        if (p->gen >= 050)
+                br = 2;
+        if (p->gen >= 070) {
+                insn = brw_next_insn(p, BRW_OPCODE_WHILE);
+                brw_set_dest(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, brw_imm_ud(0));
+                insn->bits3.break_cont.jip = br * (do_insn - insn);
+                insn->header.execution_size = BRW_EXECUTE_8;
+        } else if (p->gen >= 060) {
+                insn = brw_next_insn(p, BRW_OPCODE_WHILE);
+                brw_set_dest(p, insn, brw_imm_w(0));
+                insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+                brw_set_src0(p, insn, __retype_d(brw_null_reg()));
+                brw_set_src1(p, insn, __retype_d(brw_null_reg()));
+                insn->header.execution_size = BRW_EXECUTE_8;
+        } else {
+                if (p->single_program_flow) {
+                        insn = brw_next_insn(p, BRW_OPCODE_ADD);
+                        brw_set_dest(p, insn, brw_ip_reg());
+                        brw_set_src0(p, insn, brw_ip_reg());
+                        brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
+                        insn->header.execution_size = BRW_EXECUTE_1;
+                } else {
+                        insn = brw_next_insn(p, BRW_OPCODE_WHILE);
+                        assert(do_insn->header.opcode == BRW_OPCODE_DO);
+                        brw_set_dest(p, insn, brw_ip_reg());
+                        brw_set_src0(p, insn, brw_ip_reg());
+                        brw_set_src1(p, insn, brw_imm_d(0));
+                        insn->header.execution_size = do_insn->header.execution_size;
+                        insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+                        insn->bits3.if_else.pop_count = 0;
+                        insn->bits3.if_else.pad0 = 0;
+                }
+        }
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        p->current->header.predicate_control = BRW_PREDICATE_NONE;
+        return insn;
+}
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p,
+                       struct brw_instruction *jmp_insn)
+{
+        struct brw_instruction *landing = &p->store[p->nr_insn];
+        unsigned jmpi = 1;
+        if (p->gen >= 050)
+                jmpi = 2;
+        assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+        assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
+        jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register.  It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+             struct brw_reg dest,
+             unsigned conditional,
+             struct brw_reg src0,
+             struct brw_reg src1)
+{
+        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
+        insn->header.destreg__conditionalmod = conditional;
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, src1);
+        /* Make it so that future instructions will use the computed flag
+         * value until brw_set_predicate_control_flag_value() is called
+         * again.
+         */
+        if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+            dest.nr == 0) {
+                p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+                p->flag_value = 0xff;
+        }
+}
+/* Issue 'wait' instruction for n1, host could program MMIO
+   to wake up thread. */
+void brw_WAIT(struct brw_compile *p)
+{
+        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
+        struct brw_reg src = brw_notification_1_reg();
+        brw_set_dest(p, insn, src);
+        brw_set_src0(p, insn, src);
+        brw_set_src1(p, insn, brw_null_reg());
+        insn->header.execution_size = 0; /* must */
+        insn->header.predicate_control = 0;
+        insn->header.compression_control = 0;
+}
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+/** Extended math function, float[8].
+ */
+void brw_math(struct brw_compile *p,
+              struct brw_reg dest,
+              unsigned function,
+              unsigned saturate,
+              unsigned msg_reg_nr,
+              struct brw_reg src,
+              unsigned data_type,
+              unsigned precision)
+{
+        if (p->gen >= 060) {
+                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
+                assert(dest.file == BRW_GENERAL_REGISTER_FILE);
+                assert(src.file == BRW_GENERAL_REGISTER_FILE);
+                assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+                assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+                /* Source modifiers are ignored for extended math instructions. */
+                assert(!src.negate);
+                assert(!src.abs);
+                if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
+                    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+                        assert(src.type == BRW_REGISTER_TYPE_F);
+                }
+                /* Math is the same ISA format as other opcodes, except that CondModifier
+                 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+                 */
+                insn->header.destreg__conditionalmod = function;
+                insn->header.saturate = saturate;
+                brw_set_dest(p, insn, dest);
+                brw_set_src0(p, insn, src);
+                brw_set_src1(p, insn, brw_null_reg());
+        } else {
+                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+                /* Example code doesn't set predicate_control for send
+                 * instructions.
+                 */
+                insn->header.predicate_control = 0;
+                insn->header.destreg__conditionalmod = msg_reg_nr;
+                brw_set_dest(p, insn, dest);
+                brw_set_src0(p, insn, src);
+                brw_set_math_message(p, insn, function,
+                                     src.type == BRW_REGISTER_TYPE_D,
+                                     precision,
+                                     saturate,
+                                     data_type);
+        }
+}
+/** Extended math function, float[8].
+ */
+void brw_math2(struct brw_compile *p,
+               struct brw_reg dest,
+               unsigned function,
+               struct brw_reg src0,
+               struct brw_reg src1)
+{
+        struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
+        assert(dest.file == BRW_GENERAL_REGISTER_FILE);
+        assert(src0.file == BRW_GENERAL_REGISTER_FILE);
+        assert(src1.file == BRW_GENERAL_REGISTER_FILE);
+        assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
+        assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
+        assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
+        if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
+            function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
+                assert(src0.type == BRW_REGISTER_TYPE_F);
+                assert(src1.type == BRW_REGISTER_TYPE_F);
+        }
+        /* Source modifiers are ignored for extended math instructions. */
+        assert(!src0.negate);
+        assert(!src0.abs);
+        assert(!src1.negate);
+        assert(!src1.abs);
+        /* Math is the same ISA format as other opcodes, except that CondModifier
+         * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+         */
+        insn->header.destreg__conditionalmod = function;
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, src1);
+}
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16(struct brw_compile *p,
+                 struct brw_reg dest,
+                 unsigned function,
+                 unsigned saturate,
+                 unsigned msg_reg_nr,
+                 struct brw_reg src,
+                 unsigned precision)
+{
+        struct brw_instruction *insn;
+        if (p->gen >= 060) {
+                insn = brw_next_insn(p, BRW_OPCODE_MATH);
+                /* Math is the same ISA format as other opcodes, except that CondModifier
+                 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
+                 */
+                insn->header.destreg__conditionalmod = function;
+                insn->header.saturate = saturate;
+                /* Source modifiers are ignored for extended math instructions. */
+                assert(!src.negate);
+                assert(!src.abs);
+                brw_set_dest(p, insn, dest);
+                brw_set_src0(p, insn, src);
+                brw_set_src1(p, insn, brw_null_reg());
+                return;
+        }
+        /* First instruction:
+        */
+        brw_push_insn_state(p);
+        brw_set_predicate_control_flag_value(p, 0xff);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.destreg__conditionalmod = msg_reg_nr;
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src);
+        brw_set_math_message(p, insn, function,
+                             BRW_MATH_INTEGER_UNSIGNED,
+                             precision,
+                             saturate,
+                             BRW_MATH_DATA_VECTOR);
+        /* Second instruction:
+        */
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+        insn->header.destreg__conditionalmod = msg_reg_nr+1;
+        brw_set_dest(p, insn, __offset(dest,1));
+        brw_set_src0(p, insn, src);
+        brw_set_math_message(p, insn, function,
+                             BRW_MATH_INTEGER_UNSIGNED,
+                             precision,
+                             saturate,
+                             BRW_MATH_DATA_VECTOR);
+        brw_pop_insn_state(p);
+}
+/**
+ * Write a block of OWORDs (half a GRF each) from the scratch buffer,
+ * using a constant offset per channel.
+ *
+ * The offset must be aligned to oword size (16 bytes).  Used for
+ * register spilling.
+ */
+void brw_oword_block_write_scratch(struct brw_compile *p,
+                                   struct brw_reg mrf,
+                                   int num_regs,
+                                   unsigned offset)
+{
+        uint32_t msg_control, msg_type;
+        int mlen;
+        if (p->gen >= 060)
+                offset /= 16;
+        mrf = __retype_ud(mrf);
+        if (num_regs == 1) {
+                msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+                mlen = 2;
+        } else {
+                msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+                mlen = 3;
+        }
+        /* Set up the message header.  This is g0, with g0.2 filled with
+         * the offset.  We don't want to leave our offset around in g0 or
+         * it'll screw up texture samples, so set it up inside the message
+         * reg.
+         */
+        {
+                brw_push_insn_state(p);
+                brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
+                /* set message header global offset field (reg 0, element 2) */
+                brw_MOV(p,
+                        __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
+                        brw_imm_ud(offset));
+                brw_pop_insn_state(p);
+        }
+        {
+                struct brw_reg dest;
+                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+                int send_commit_msg;
+                struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
+                if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
+                        insn->header.compression_control = BRW_COMPRESSION_NONE;
+                        src_header = vec16(src_header);
+                }
+                assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
+                insn->header.destreg__conditionalmod = mrf.nr;
+                /* Until gen6, writes followed by reads from the same location
+                 * are not guaranteed to be ordered unless write_commit is set.
+                 * If set, then a no-op write is issued to the destination
+                 * register to set a dependency, and a read from the destination
+                 * can be used to ensure the ordering.
+                 *
+                 * For gen6, only writes between different threads need ordering
+                 * protection.  Our use of DP writes is all about register
+                 * spilling within a thread.
+                 */
+                if (p->gen >= 060) {
+                        dest = __retype_uw(vec16(brw_null_reg()));
+                        send_commit_msg = 0;
+                } else {
+                        dest = src_header;
+                        send_commit_msg = 1;
+                }
+                brw_set_dest(p, insn, dest);
+                if (p->gen >= 060) {
+                        brw_set_src0(p, insn, mrf);
+                } else {
+                        brw_set_src0(p, insn, brw_null_reg());
+                }
+                if (p->gen >= 060)
+                        msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+                else
+                        msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+                brw_set_dp_write_message(p,
+                                         insn,
+, /* binding table index (255=stateless) */
+                                         msg_control,
+                                         msg_type,
+                                         mlen,
+                                         true, /* header_present */
+, /* pixel scoreboard */
+                                         send_commit_msg, /* response_length */
+, /* eot */
+                                         send_commit_msg);
+        }
+}
+/**
+ * Read a block of owords (half a GRF each) from the scratch buffer
+ * using a constant index per channel.
+ *
+ * Offset must be aligned to oword size (16 bytes).  Used for register
+ * spilling.
+ */
+void
+brw_oword_block_read_scratch(struct brw_compile *p,
+                             struct brw_reg dest,
+                             struct brw_reg mrf,
+                             int num_regs,
+                             unsigned offset)
+{
+        uint32_t msg_control;
+        int rlen;
+        if (p->gen >= 060)
+                offset /= 16;
+        mrf = __retype_ud(mrf);
+        dest = __retype_uw(dest);
+        if (num_regs == 1) {
+                msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+                rlen = 1;
+        } else {
+                msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
+                rlen = 2;
+        }
+        {
+                brw_push_insn_state(p);
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
+                /* set message header global offset field (reg 0, element 2) */
+                brw_MOV(p,
+                        __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
+                        brw_imm_ud(offset));
+                brw_pop_insn_state(p);
+        }
+        {
+                struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+                assert(insn->header.predicate_control == 0);
+                insn->header.compression_control = BRW_COMPRESSION_NONE;
+                insn->header.destreg__conditionalmod = mrf.nr;
+                brw_set_dest(p, insn, dest); /* UW? */
+                if (p->gen >= 060) {
+                        brw_set_src0(p, insn, mrf);
+                } else {
+                        brw_set_src0(p, insn, brw_null_reg());
+                }
+                brw_set_dp_read_message(p,
+                                        insn,
+, /* binding table index (255=stateless) */
+                                        msg_control,
+                                        BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                                        BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+, /* msg_length */
+                                        rlen);
+        }
+}
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ */
+void brw_oword_block_read(struct brw_compile *p,
+                          struct brw_reg dest,
+                          struct brw_reg mrf,
+                          uint32_t offset,
+                          uint32_t bind_table_index)
+{
+        struct brw_instruction *insn;
+        /* On newer hardware, offset is in units of owords. */
+        if (p->gen >= 060)
+                offset /= 16;
+        mrf = __retype_ud(mrf);
+        brw_push_insn_state(p);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_set_mask_control(p, BRW_MASK_DISABLE);
+        brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
+        /* set message header global offset field (reg 0, element 2) */
+        brw_MOV(p,
+                __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
+                brw_imm_ud(offset));
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.destreg__conditionalmod = mrf.nr;
+        /* cast dest to a uword[8] vector */
+        dest = __retype_uw(vec8(dest));
+        brw_set_dest(p, insn, dest);
+        if (p->gen >= 060) {
+                brw_set_src0(p, insn, mrf);
+        } else {
+                brw_set_src0(p, insn, brw_null_reg());
+        }
+        brw_set_dp_read_message(p,
+                                insn,
+                                bind_table_index,
+                                BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+                                BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
+                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* msg_length */
+); /* response_length (1 reg, 2 owords!) */
+        brw_pop_insn_state(p);
+}
+/**
+ * Read a set of dwords from the data port Data Cache (const buffer).
+ *
+ * Location (in buffer) appears as UD offsets in the register after
+ * the provided mrf header reg.
+ */
+void brw_dword_scattered_read(struct brw_compile *p,
+                              struct brw_reg dest,
+                              struct brw_reg mrf,
+                              uint32_t bind_table_index)
+{
+        struct brw_instruction *insn;
+        mrf = __retype_ud(mrf);
+        brw_push_insn_state(p);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_set_mask_control(p, BRW_MASK_DISABLE);
+        brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
+        brw_pop_insn_state(p);
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.destreg__conditionalmod = mrf.nr;
+        /* cast dest to a uword[8] vector */
+        dest = __retype_uw(vec8(dest));
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, brw_null_reg());
+        brw_set_dp_read_message(p,
+                                insn,
+                                bind_table_index,
+                                BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
+                                BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
+                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* msg_length */
+); /* response_length */
+}
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      unsigned location,
+                      unsigned bind_table_index)
+{
+        struct brw_instruction *insn;
+        unsigned msg_reg_nr = 1;
+        if (p->gen >= 060)
+                location /= 16;
+        /* Setup MRF[1] with location/offset into const buffer */
+        brw_push_insn_state(p);
+        brw_set_access_mode(p, BRW_ALIGN_1);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_set_mask_control(p, BRW_MASK_DISABLE);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+        brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
+                brw_imm_ud(location));
+        brw_pop_insn_state(p);
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.predicate_control = BRW_PREDICATE_NONE;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.destreg__conditionalmod = msg_reg_nr;
+        insn->header.mask_control = BRW_MASK_DISABLE;
+        brw_set_dest(p, insn, dest);
+        if (p->gen >= 060) {
+                brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
+        } else {
+                brw_set_src0(p, insn, brw_null_reg());
+        }
+        brw_set_dp_read_message(p,
+                                insn,
+                                bind_table_index,
+,
+                                BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* msg_length */
+); /* response_length (1 Oword) */
+}
+/**
+ * Read a float[4] constant per vertex from VS constant buffer, with
+ * relative addressing.
+ */
+void brw_dp_READ_4_vs_relative(struct brw_compile *p,
+                               struct brw_reg dest,
+                               struct brw_reg addr_reg,
+                               unsigned offset,
+                               unsigned bind_table_index)
+{
+        struct brw_reg src = brw_vec8_grf(0, 0);
+        struct brw_instruction *insn;
+        int msg_type;
+        /* Setup MRF[1] with offset into const buffer */
+        brw_push_insn_state(p);
+        brw_set_access_mode(p, BRW_ALIGN_1);
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        brw_set_mask_control(p, BRW_MASK_DISABLE);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+        /* M1.0 is block offset 0, M1.4 is block offset 1, all other
+         * fields ignored.
+         */
+        brw_ADD(p, __retype_d(brw_message_reg(1)),
+                addr_reg, brw_imm_d(offset));
+        brw_pop_insn_state(p);
+        gen6_resolve_implied_move(p, &src, 0);
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.predicate_control = BRW_PREDICATE_NONE;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        insn->header.destreg__conditionalmod = 0;
+        insn->header.mask_control = BRW_MASK_DISABLE;
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src);
+        if (p->gen >= 060)
+                msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+        else if (p->gen >= 045)
+                msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+        else
+                msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
+        brw_set_dp_read_message(p,
+                                insn,
+                                bind_table_index,
+                                BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+                                msg_type,
+                                BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+, /* msg_length */
+); /* response_length */
+}
+void brw_fb_WRITE(struct brw_compile *p,
+                  int dispatch_width,
+                  unsigned msg_reg_nr,
+                  struct brw_reg src0,
+                  unsigned msg_control,
+                  unsigned binding_table_index,
+                  unsigned msg_length,
+                  unsigned response_length,
+                  bool eot,
+                  bool header_present)
+{
+        struct brw_instruction *insn;
+        unsigned msg_type;
+        struct brw_reg dest;
+        if (dispatch_width == 16)
+                dest = __retype_uw(vec16(brw_null_reg()));
+        else
+                dest = __retype_uw(vec8(brw_null_reg()));
+        if (p->gen >= 060 && binding_table_index == 0) {
+                insn = brw_next_insn(p, BRW_OPCODE_SENDC);
+        } else {
+                insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        }
+        /* The execution mask is ignored for render target writes. */
+        insn->header.predicate_control = 0;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        if (p->gen >= 060) {
+                /* headerless version, just submit color payload */
+                src0 = brw_message_reg(msg_reg_nr);
+                msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+        } else {
+                insn->header.destreg__conditionalmod = msg_reg_nr;
+                msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+        }
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_dp_write_message(p,
+                                 insn,
+                                 binding_table_index,
+                                 msg_control,
+                                 msg_type,
+                                 msg_length,
+                                 header_present,
+                                 eot,
+                                 response_length,
+                                 eot,
+/* send_commit_msg */);
+}
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed.  See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+                struct brw_reg dest,
+                unsigned msg_reg_nr,
+                struct brw_reg src0,
+                unsigned binding_table_index,
+                unsigned sampler,
+                unsigned writemask,
+                unsigned msg_type,
+                unsigned response_length,
+                unsigned msg_length,
+                bool header_present,
+                unsigned simd_mode)
+{
+        assert(writemask);
+        if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
+                struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+                writemask = ~writemask & WRITEMASK_XYZW;
+                brw_push_insn_state(p);
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
+                brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
+                brw_pop_insn_state(p);
+                src0 = __retype_uw(brw_null_reg());
+        }
+        {
+                struct brw_instruction *insn;
+                gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+                insn = brw_next_insn(p, BRW_OPCODE_SEND);
+                insn->header.predicate_control = 0; /* XXX */
+                insn->header.compression_control = BRW_COMPRESSION_NONE;
+                if (p->gen < 060)
+                        insn->header.destreg__conditionalmod = msg_reg_nr;
+                brw_set_dest(p, insn, dest);
+                brw_set_src0(p, insn, src0);
+                brw_set_sampler_message(p, insn,
+                                        binding_table_index,
+                                        sampler,
+                                        msg_type,
+                                        response_length,
+                                        msg_length,
+                                        header_present,
+                                        simd_mode);
+        }
+}
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style.  Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+                   struct brw_reg dest,
+                   unsigned msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   bool used,
+                   unsigned msg_length,
+                   unsigned response_length,
+                   bool eot,
+                   bool writes_complete,
+                   unsigned offset,
+                   unsigned swizzle)
+{
+        struct brw_instruction *insn;
+        gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+        if (p->gen >= 070) {
+                /* Enable Channel Masks in the URB_WRITE_HWORD message header */
+                brw_push_insn_state(p);
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
+                       __retype_ud(brw_vec1_grf(0, 5)),
+                       brw_imm_ud(0xff00));
+                brw_pop_insn_state(p);
+        }
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        assert(msg_length < BRW_MAX_MRF);
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, brw_imm_d(0));
+        if (p->gen <= 060)
+                insn->header.destreg__conditionalmod = msg_reg_nr;
+        brw_set_urb_message(p,
+                            insn,
+                            allocate,
+                            used,
+                            msg_length,
+                            response_length,
+                            eot,
+                            writes_complete,
+                            offset,
+                            swizzle);
+}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+        int ip;
+        for (ip = start + 1; ip < p->nr_insn; ip++) {
+                struct brw_instruction *insn = &p->store[ip];
+                switch (insn->header.opcode) {
+                case BRW_OPCODE_ENDIF:
+                case BRW_OPCODE_ELSE:
+                case BRW_OPCODE_WHILE:
+                        return ip;
+                }
+        }
+        assert(!"not reached");
+        return start + 1;
+}
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+        int ip;
+        int br = 2;
+        for (ip = start + 1; ip < p->nr_insn; ip++) {
+                struct brw_instruction *insn = &p->store[ip];
+                if (insn->header.opcode == BRW_OPCODE_WHILE) {
+                        int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
+                                : insn->bits3.break_cont.jip;
+                        if (ip + jip / br <= start)
+                                return ip;
+                }
+        }
+        assert(!"not reached");
+        return start + 1;
+}
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+        int ip;
+        int br = 2;
+        if (p->gen <= 060)
+                return;
+        for (ip = 0; ip < p->nr_insn; ip++) {
+                struct brw_instruction *insn = &p->store[ip];
+                switch (insn->header.opcode) {
+                case BRW_OPCODE_BREAK:
+                        insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+                        /* Gen7 UIP points to WHILE; Gen6 points just after it */
+                        insn->bits3.break_cont.uip =
+                                br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
+                        break;
+                case BRW_OPCODE_CONTINUE:
+                        insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+                        insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
+                        assert(insn->bits3.break_cont.uip != 0);
+                        assert(insn->bits3.break_cont.jip != 0);
+                        break;
+                }
+        }
+}
+void brw_ff_sync(struct brw_compile *p,
+                   struct brw_reg dest,
+                   unsigned msg_reg_nr,
+                   struct brw_reg src0,
+                   bool allocate,
+                   unsigned response_length,
+                   bool eot)
+{
+        struct brw_instruction *insn;
+        gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        brw_set_dest(p, insn, dest);
+        brw_set_src0(p, insn, src0);
+        brw_set_src1(p, insn, brw_imm_d(0));
+        if (p->gen < 060)
+                insn->header.destreg__conditionalmod = msg_reg_nr;
+        brw_set_ff_sync_message(p,
+                                insn,
+                                allocate,
+                                response_length,
+                                eot);
+}

 /drivers/video/Intel-2D/brw/brw_wm.c
 ,0 → 1,681
+#include "brw.h"
+#define X16 8
+#define Y16 10
+static void brw_wm_xy(struct brw_compile *p, int dw)
+{
+        struct brw_reg r1 = brw_vec1_grf(1, 0);
+        struct brw_reg r1_uw = __retype_uw(r1);
+        struct brw_reg x_uw, y_uw;
+        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+        if (dw == 16) {
+                x_uw = brw_uw16_grf(30, 0);
+                y_uw = brw_uw16_grf(28, 0);
+        } else {
+                x_uw = brw_uw8_grf(30, 0);
+                y_uw = brw_uw8_grf(28, 0);
+        }
+        brw_ADD(p,
+                x_uw,
+                __stride(__suboffset(r1_uw, 4), 2, 4, 0),
+                brw_imm_v(0x10101010));
+        brw_ADD(p,
+                y_uw,
+                __stride(__suboffset(r1_uw, 5), 2, 4, 0),
+                brw_imm_v(0x11001100));
+        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
+        brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
+}
+static void brw_wm_affine_st(struct brw_compile *p, int dw,
+                             int channel, int msg)
+{
+        int uv;
+        if (dw == 16) {
+                brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+                uv = p->gen >= 060 ? 6 : 3;
+        } else {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                uv = p->gen >= 060 ? 4 : 3;
+        }
+        uv += 2*channel;
+        msg++;
+        if (p->gen >= 060) {
+                brw_PLN(p,
+                        brw_message_reg(msg),
+                        brw_vec1_grf(uv, 0),
+                        brw_vec8_grf(2, 0));
+                msg += dw/8;
+                brw_PLN(p,
+                        brw_message_reg(msg),
+                        brw_vec1_grf(uv, 4),
+                        brw_vec8_grf(2, 0));
+        } else {
+                struct brw_reg r = brw_vec1_grf(uv, 0);
+                brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+                brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+                msg += dw/8;
+                brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+                brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+        }
+}
+static inline unsigned simd(int dw)
+{
+        return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
+}
+static inline struct brw_reg sample_result(int dw, int result)
+{
+        return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
+                       BRW_REGISTER_TYPE_UW,
+                       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+                       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYZW,
+                       WRITEMASK_XYZW);
+}
+static int brw_wm_sample(struct brw_compile *p, int dw,
+                         int channel, int msg, int result)
+{
+        struct brw_reg src0;
+        bool header;
+        int len;
+        len = dw == 16 ? 4 : 2;
+        if (p->gen >= 060) {
+                header = false;
+                src0 = brw_message_reg(++msg);
+        } else {
+                header = true;
+                src0 = brw_vec8_grf(0, 0);
+        }
+        brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+                   channel+1, channel, WRITEMASK_XYZW, 0,
+*len, len+header, header, simd(dw));
+        return result;
+}
+static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
+                                int channel, int msg, int result)
+{
+        struct brw_reg src0;
+        int mlen, rlen;
+        if (dw == 8) {
+                /* SIMD8 sample return is not masked */
+                mlen = 3;
+                rlen = 4;
+        } else {
+                mlen = 5;
+                rlen = 2;
+        }
+        if (p->gen >= 060)
+                src0 = brw_message_reg(msg);
+        else
+                src0 = brw_vec8_grf(0, 0);
+        brw_SAMPLE(p, sample_result(dw, result), msg, src0,
+                   channel+1, channel, WRITEMASK_W, 0,
+                   rlen, mlen, true, simd(dw));
+        if (dw == 8)
+                result += 3;
+        return result;
+}
+static int brw_wm_affine(struct brw_compile *p, int dw,
+                         int channel, int msg, int result)
+{
+        brw_wm_affine_st(p, dw, channel, msg);
+        return brw_wm_sample(p, dw, channel, msg, result);
+}
+static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
+                                int channel, int msg, int result)
+{
+        brw_wm_affine_st(p, dw, channel, msg);
+        return brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+static inline struct brw_reg null_result(int dw)
+{
+        return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
+                       BRW_REGISTER_TYPE_UW,
+                       dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
+                       dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
+                       BRW_HORIZONTAL_STRIDE_1,
+                       BRW_SWIZZLE_XYZW,
+                       WRITEMASK_XYZW);
+}
+static void brw_fb_write(struct brw_compile *p, int dw)
+{
+        struct brw_instruction *insn;
+        unsigned msg_control, msg_type, msg_len;
+        struct brw_reg src0;
+        bool header;
+        if (dw == 16) {
+                brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+                msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+                msg_len = 8;
+        } else {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+                msg_len = 4;
+        }
+        if (p->gen < 060) {
+                brw_push_insn_state(p);
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
+                brw_pop_insn_state(p);
+                msg_len += 2;
+        }
+        /* The execution mask is ignored for render target writes. */
+        insn = brw_next_insn(p, BRW_OPCODE_SEND);
+        insn->header.predicate_control = 0;
+        insn->header.compression_control = BRW_COMPRESSION_NONE;
+        if (p->gen >= 060) {
+                msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+                src0 = brw_message_reg(2);
+                header = false;
+        } else {
+                insn->header.destreg__conditionalmod = 0;
+                msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+                src0 = __retype_uw(brw_vec8_grf(0, 0));
+                header = true;
+        }
+        brw_set_dest(p, insn, null_result(dw));
+        brw_set_src0(p, insn, src0);
+        brw_set_dp_write_message(p, insn, 0,
+                                 msg_control, msg_type, msg_len,
+                                 header, true, 0, true, false);
+}
+static void brw_wm_write(struct brw_compile *p, int dw, int src)
+{
+        int n;
+        if (dw == 8 && p->gen >= 060) {
+                /* XXX pixel execution mask? */
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
+                brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
+                brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
+                brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
+                goto done;
+        }
+        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        for (n = 0; n < 4; n++) {
+                if (p->gen >= 060) {
+                        brw_MOV(p,
+                                brw_message_reg(2 + 2*n),
+                                brw_vec8_grf(src + 2*n, 0));
+                } else if (p->gen >= 045 && dw == 16) {
+                        brw_MOV(p,
+                                brw_message_reg(2 + n + BRW_MRF_COMPR4),
+                                brw_vec8_grf(src + 2*n, 0));
+                } else {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_MOV(p,
+                                brw_message_reg(2 + n),
+                                brw_vec8_grf(src + 2*n, 0));
+                        if (dw == 16) {
+                                brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+                                brw_MOV(p,
+                                        brw_message_reg(2 + n + 4),
+                                        brw_vec8_grf(src + 2*n+1, 0));
+                        }
+                }
+        }
+done:
+        brw_fb_write(p, dw);
+}
+static void brw_wm_write__mask(struct brw_compile *p, int dw,
+                               int src, int mask)
+{
+        int n;
+        if (dw == 8 && p->gen >= 060) {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MUL(p,
+                        brw_message_reg(2),
+                        brw_vec8_grf(src+0, 0),
+                        brw_vec8_grf(mask, 0));
+                brw_MUL(p,
+                        brw_message_reg(3),
+                        brw_vec8_grf(src+1, 0),
+                        brw_vec8_grf(mask, 0));
+                brw_MUL(p,
+                        brw_message_reg(4),
+                        brw_vec8_grf(src+2, 0),
+                        brw_vec8_grf(mask, 0));
+                brw_MUL(p,
+                        brw_message_reg(5),
+                        brw_vec8_grf(src+3, 0),
+                        brw_vec8_grf(mask, 0));
+                goto done;
+        }
+        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        for (n = 0; n < 4; n++) {
+                if (p->gen >= 060) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + 2*n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask, 0));
+                } else if (p->gen >= 045 && dw == 16) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + n + BRW_MRF_COMPR4),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask, 0));
+                } else {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_MUL(p,
+                                brw_message_reg(2 + n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask, 0));
+                        if (dw == 16) {
+                                brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+                                brw_MUL(p,
+                                        brw_message_reg(2 + n + 4),
+                                        brw_vec8_grf(src + 2*n+1, 0),
+                                        brw_vec8_grf(mask+1, 0));
+                        }
+                }
+        }
+done:
+        brw_fb_write(p, dw);
+}
+static void brw_wm_write__opacity(struct brw_compile *p, int dw,
+                                  int src, int mask)
+{
+        int n;
+        if (dw == 8 && p->gen >= 060) {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MUL(p,
+                        brw_message_reg(2),
+                        brw_vec8_grf(src+0, 0),
+                        brw_vec1_grf(mask, 3));
+                brw_MUL(p,
+                        brw_message_reg(3),
+                        brw_vec8_grf(src+1, 0),
+                        brw_vec1_grf(mask, 3));
+                brw_MUL(p,
+                        brw_message_reg(4),
+                        brw_vec8_grf(src+2, 0),
+                        brw_vec1_grf(mask, 3));
+                brw_MUL(p,
+                        brw_message_reg(5),
+                        brw_vec8_grf(src+3, 0),
+                        brw_vec1_grf(mask, 3));
+                goto done;
+        }
+        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        for (n = 0; n < 4; n++) {
+                if (p->gen >= 060) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + 2*n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec1_grf(mask, 3));
+                } else if (p->gen >= 045 && dw == 16) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + n + BRW_MRF_COMPR4),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec1_grf(mask, 3));
+                } else {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_MUL(p,
+                                brw_message_reg(2 + n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec1_grf(mask, 3));
+                        if (dw == 16) {
+                                brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+                                brw_MUL(p,
+                                        brw_message_reg(2 + n + 4),
+                                        brw_vec8_grf(src + 2*n+1, 0),
+                                        brw_vec1_grf(mask, 3));
+                        }
+                }
+        }
+done:
+        brw_fb_write(p, dw);
+}
+static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
+                                  int src, int mask)
+{
+        int n;
+        if (dw == 8 && p->gen >= 060) {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                brw_MUL(p,
+                        brw_message_reg(2),
+                        brw_vec8_grf(src  + 0, 0),
+                        brw_vec8_grf(mask + 0, 0));
+                brw_MUL(p,
+                        brw_message_reg(3),
+                        brw_vec8_grf(src  + 1, 0),
+                        brw_vec8_grf(mask + 1, 0));
+                brw_MUL(p,
+                        brw_message_reg(4),
+                        brw_vec8_grf(src  + 2, 0),
+                        brw_vec8_grf(mask + 2, 0));
+                brw_MUL(p,
+                        brw_message_reg(5),
+                        brw_vec8_grf(src  + 3, 0),
+                        brw_vec8_grf(mask + 3, 0));
+                goto done;
+        }
+        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+        for (n = 0; n < 4; n++) {
+                if (p->gen >= 060) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + 2*n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask + 2*n, 0));
+                } else if (p->gen >= 045 && dw == 16) {
+                        brw_MUL(p,
+                                brw_message_reg(2 + n + BRW_MRF_COMPR4),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask + 2*n, 0));
+                } else {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_MUL(p,
+                                brw_message_reg(2 + n),
+                                brw_vec8_grf(src + 2*n, 0),
+                                brw_vec8_grf(mask + 2*n, 0));
+                        if (dw == 16) {
+                                brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+                                brw_MUL(p,
+                                        brw_message_reg(2 + n + 4),
+                                        brw_vec8_grf(src + 2*n + 1, 0),
+                                        brw_vec8_grf(mask + 2*n + 1, 0));
+                        }
+                }
+        }
+done:
+        brw_fb_write(p, dw);
+}
+bool
+brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
+{
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
+        return true;
+}
+bool
+brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_affine(p, dispatch, 0, 1, 12);
+        mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
+        brw_wm_write__mask(p, dispatch, src, mask);
+        return true;
+}
+bool
+brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_affine(p, dispatch, 0, 1, 12);
+        mask = brw_wm_affine(p, dispatch, 1, 6, 20);
+        brw_wm_write__mask_ca(p, dispatch, src, mask);
+        return true;
+}
+bool
+brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
+        mask = brw_wm_affine(p, dispatch, 1, 6, 16);
+        brw_wm_write__mask(p, dispatch, mask, src);
+        return true;
+}
+/* Projective variants */
+static void brw_wm_projective_st(struct brw_compile *p, int dw,
+                                 int channel, int msg)
+{
+        int uv;
+        if (dw == 16) {
+                brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+                uv = p->gen >= 060 ? 6 : 3;
+        } else {
+                brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                uv = p->gen >= 060 ? 4 : 3;
+        }
+        uv += 2*channel;
+        msg++;
+        if (p->gen >= 060) {
+                /* First compute 1/z */
+                brw_PLN(p,
+                        brw_message_reg(msg),
+                        brw_vec1_grf(uv+1, 0),
+                        brw_vec8_grf(2, 0));
+                if (dw == 16) {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+                        brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+                        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+                } else
+                        brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+                brw_PLN(p,
+                        brw_vec8_grf(28, 0),
+                        brw_vec1_grf(uv, 0),
+                        brw_vec8_grf(2, 0));
+                brw_MUL(p,
+                        brw_message_reg(msg),
+                        brw_vec8_grf(28, 0),
+                        brw_vec8_grf(30, 0));
+                msg += dw/8;
+                brw_PLN(p,
+                        brw_vec8_grf(28, 0),
+                        brw_vec1_grf(uv, 0),
+                        brw_vec8_grf(4, 0));
+                brw_MUL(p,
+                        brw_message_reg(msg),
+                        brw_vec8_grf(28, 0),
+                        brw_vec8_grf(30, 0));
+        } else {
+                struct brw_reg r = brw_vec1_grf(uv, 0);
+                /* First compute 1/z */
+                brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
+                brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
+                if (dw == 16) {
+                        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+                        brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+                        brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
+                        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+                } else
+                        brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
+                /* Now compute the output s,t values */
+                brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
+                brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
+                brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+                msg += dw/8;
+                brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
+                brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
+                brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
+        }
+}
+static int brw_wm_projective(struct brw_compile *p, int dw,
+                             int channel, int msg, int result)
+{
+        brw_wm_projective_st(p, dw, channel, msg);
+        return brw_wm_sample(p, dw, channel, msg, result);
+}
+static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
+                                     int channel, int msg, int result)
+{
+        brw_wm_projective_st(p, dw, channel, msg);
+        return brw_wm_sample__alpha(p, dw, channel, msg, result);
+}
+bool
+brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
+{
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
+        return true;
+}
+bool
+brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_projective(p, dispatch, 0, 1, 12);
+        mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
+        brw_wm_write__mask(p, dispatch, src, mask);
+        return true;
+}
+bool
+brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_projective(p, dispatch, 0, 1, 12);
+        mask = brw_wm_projective(p, dispatch, 1, 6, 20);
+        brw_wm_write__mask_ca(p, dispatch, src, mask);
+        return true;
+}
+bool
+brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060)
+                brw_wm_xy(p, dispatch);
+        src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
+        mask = brw_wm_projective(p, dispatch, 1, 6, 16);
+        brw_wm_write__mask(p, dispatch, mask, src);
+        return true;
+}
+bool
+brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060) {
+                brw_wm_xy(p, dispatch);
+                mask = 5;
+        } else
+                mask = dispatch == 16 ? 8 : 6;
+        src = brw_wm_affine(p, dispatch, 0, 1, 12);
+        brw_wm_write__opacity(p, dispatch, src, mask);
+        return true;
+}
+bool
+brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
+{
+        int src, mask;
+        if (p->gen < 060) {
+                brw_wm_xy(p, dispatch);
+                mask = 5;
+        } else
+                mask = dispatch == 16 ? 8 : 6;
+        src = brw_wm_projective(p, dispatch, 0, 1, 12);
+        brw_wm_write__opacity(p, dispatch, src, mask);
+        return true;
+}

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3253 → Rev 3254