Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3290 → Rev 3291

/drivers/video/Intel-2D/compiler.h
38,6 → 38,7
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define __packed__ __attribute__((__packed__))
#define flatten __attribute__((flatten))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
48,6 → 49,7
#define constant
#define pure
#define __packed__
#define flatten
#endif
 
#ifdef HAVE_VALGRIND
/drivers/video/Intel-2D/gen4_render.c
0,0 → 1,1441
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen4_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
/* gen4 has a serious issue with its shaders that we need to flush
* after every rectangle... So until that is resolved, prefer
* the BLT engine.
*/
#define FORCE_SPANS 0
#define FORCE_NONRECTILINEAR_SPANS -1
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
 
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
#define URB_CS_ENTRY_SIZE 1
#define URB_CS_ENTRIES 0
 
#define URB_VS_ENTRY_SIZE 1
#define URB_VS_ENTRIES 32
 
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
 
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
 
#define URB_SF_ENTRY_SIZE 2
#define URB_SF_ENTRIES 64
 
/*
* this program computes dA/dx and dA/dy for the texture coordinates along
* with the base texture coordinate. It was extracted from the Mesa driver
*/
 
#define SF_KERNEL_NUM_GRF 16
#define PS_KERNEL_NUM_GRF 32
 
#define GEN4_MAX_SF_THREADS 24
#define GEN4_MAX_WM_THREADS 32
#define G4X_MAX_WM_THREADS 50
 
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
static const uint32_t ps_kernel_planar_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_planar.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
#define NOKERNEL(kernel_enum, func, masked) \
[kernel_enum] = {func, 0, masked}
#define KERNEL(kernel_enum, kernel, masked) \
[kernel_enum] = {&kernel, sizeof(kernel), masked}
static const struct wm_kernel_info {
const void *data;
unsigned int size;
bool has_mask;
} wm_kernels[] = {
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen4_blend_op[] = {
/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen4_blend_op.
*
* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
 
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel);
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
#define GEN4_MAX_3D_SIZE 8192
 
static inline bool too_large(int width, int height)
{
return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
}
 
static int
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen4_blend_op[op].src_alpha)
base = WM_KERNEL_MASKSA;
else
base = WM_KERNEL_MASKCA;
} else
base = WM_KERNEL_MASK;
} else
base = WM_KERNEL;
 
return base + !is_affine;
}
 
static bool gen4_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen4_render_state *state = &sna->render_state.gen4;
 
if (!op->need_magic_ca_pass)
return false;
 
assert(sna->render.vertex_index > sna->render.vertex_start);
 
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
 
gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
gen4_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static uint32_t gen4_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN4_BLENDFACTOR_DST_ALPHA)
src = GEN4_BLENDFACTOR_ONE;
else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
src = GEN4_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen4_blend_op[op].src_alpha) {
if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen4_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
}
}
 
static uint32_t gen4_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
}
}
 
typedef struct gen4_surface_state_padded {
struct gen4_surface_state state;
char pad[32 - sizeof(struct gen4_surface_state)];
} gen4_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen4_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
break;
}
}
 
static uint32_t
gen4_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN4_SURFACE_TILED;
case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen4_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t domains;
uint16_t offset;
uint32_t *ss;
 
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
 
ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
GEN4_SURFACE_BLEND_ENABLED |
format << GEN4_SURFACE_FORMAT_SHIFT);
 
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
ss[3] = (gen4_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen4_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
 
assert((sna->render.vb_id & (1 << id)) == 0);
 
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen4_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
}
 
static bool gen4_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & (1 << id)) == 0)
gen4_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
 
return true;
}
 
static int gen4_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen4_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen4_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen4_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
static uint32_t *
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
/* Clear all surplus entries to zero in case of prefetch */
*offset = sna->kgem.surface;
return memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen4_surface_state_padded));
}
 
static void
gen4_emit_urb(struct sna *sna)
{
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
int urb_clip_start, urb_clip_size;
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
 
if (!sna->render_state.gen4.needs_urb)
return;
 
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
urb_clip_start = urb_gs_start + urb_gs_size;
urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
urb_sf_start = urb_clip_start + urb_clip_size;
urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
while ((sna->kgem.nbatch & 15) > 12)
OUT_BATCH(MI_NOOP);
 
OUT_BATCH(GEN4_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
UF0_CLIP_REALLOC |
UF0_GS_REALLOC |
UF0_VS_REALLOC |
1);
OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 
/* Constant buffer state */
OUT_BATCH(GEN4_CS_URB_STATE | 0);
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
 
sna->render_state.gen4.needs_urb = false;
}
 
static void
gen4_emit_state_base_address(struct sna *sna)
{
assert(sna->render_state.gen4.general_bo->proxy == NULL);
OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
sna->kgem.nbatch,
sna->render_state.gen4.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* media */
 
/* upper bounds, all disabled */
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
}
 
static void
gen4_emit_invariant(struct sna *sna)
{
assert(sna->kgem.surface == sna->kgem.batch_size);
 
if (sna->kgem.gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen4_emit_state_base_address(sna);
 
sna->render_state.gen4.needs_invariant = false;
}
 
static void
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen4.needs_invariant)
gen4_emit_invariant(sna);
}
 
static void
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen4.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
}
}
 
static void
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen4.surface_table == offset)
return;
 
sna->render_state.gen4.surface_table = offset;
 
/* Binding table pointers */
OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
}
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
uint16_t sp, bp;
uint32_t key;
 
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
__FUNCTION__, op->u.gen4.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel);
bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16;
if (key == sna->render_state.gen4.last_pipelined_pointers)
return;
 
OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen4.vs);
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen4.sf);
OUT_BATCH(sna->render_state.gen4.wm + sp);
OUT_BATCH(sna->render_state.gen4.cc + bp);
 
sna->render_state.gen4.last_pipelined_pointers = key;
gen4_emit_urb(sna);
}
 
static bool
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen4.drawrect_limit == limit &&
sna->render_state.gen4.drawrect_offset == offset)
return true;
 
sna->render_state.gen4.drawrect_offset = offset;
sna->render_state.gen4.drawrect_limit = limit;
 
OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
return false;
}
 
static void
gen4_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen4_render_state *render = &sna->render_state.gen4;
uint32_t src_format, dw;
int id = op->u.gen4.ve_id;
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
*/
OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u0, v0, w0 */
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u1, v1, w1 */
if (id >> 2) {
unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
id >> 2, src_offset));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
src_offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
} else {
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
}
}
 
static void
gen4_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool flush;
 
flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo),
flush));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
flush &= gen4_emit_drawing_rectangle(sna, op);
if (flush && op->op > PictOpSrc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
gen4_emit_binding_table(sna, wm_binding_table & ~1);
gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
gen4_emit_vertex_elements(sna, op);
}
 
static void
gen4_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen4_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
assert(op->u.gen4.ve_id >> 2);
binding_table[2] =
gen4_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen4.surface_table;
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
fastcall static void
gen4_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, r);
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen4_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
 
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
tmp->src.scale[0] = 1.f/width; //src->width;
tmp->src.scale[1] = 1.f/height; //src->height;
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
static void
gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen4_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen4_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen4_render_reset(struct sna *sna)
{
sna->render_state.gen4.needs_invariant = true;
sna->render_state.gen4.needs_urb = true;
sna->render_state.gen4.ve_id = -1;
sna->render_state.gen4.last_primitive = -1;
sna->render_state.gen4.last_pipelined_pointers = -1;
 
sna->render_state.gen4.drawrect_offset = -1;
sna->render_state.gen4.drawrect_limit = -1;
sna->render_state.gen4.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen4_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
}
 
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
{
struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
/* Set up the vertex shader to be disabled (passthrough) */
vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs->vs6.vs_enable = 0;
vs->vs6.vert_cache_disable = 1;
 
return sna_static_stream_offsetof(stream, vs);
}
 
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
int gen, uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
 
sf = sna_static_stream_map(stream, sizeof(*sf), 32);
 
sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf->thread0.kernel_start_pointer = kernel >> 6;
sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
/* don't smash vertex header, read start from dw8 */
sf->thread3.urb_entry_read_offset = 1;
sf->thread3.dispatch_grf_start_reg = 3;
sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf->sf5.viewport_transform = false; /* skip viewport */
sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
sf->sf6.scissor = 0;
sf->sf7.trifan_pv = 2;
sf->sf6.dest_org_vbias = 0x8;
sf->sf6.dest_org_hbias = 0x8;
 
return sna_static_stream_offsetof(stream, sf);
}
 
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
sampler_filter_t src_filter,
sampler_extend_t src_extend,
sampler_filter_t mask_filter,
sampler_extend_t mask_extend)
{
struct gen4_sampler_state *sampler_state;
 
sampler_state = sna_static_stream_map(stream,
sizeof(struct gen4_sampler_state) * 2,
32);
sampler_state_init(&sampler_state[0], src_filter, src_extend);
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
return sna_static_stream_offsetof(stream, sampler_state);
}
 
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
int gen,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
{
assert((kernel & 63) == 0);
wm->thread0.kernel_start_pointer = kernel >> 6;
wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
 
wm->thread1.single_program_flow = 0;
 
wm->thread3.const_urb_entry_read_length = 0;
wm->thread3.const_urb_entry_read_offset = 0;
 
wm->thread3.urb_entry_read_offset = 0;
wm->thread3.dispatch_grf_start_reg = 3;
 
assert((sampler & 31) == 0);
wm->wm4.sampler_state_pointer = sampler >> 5;
wm->wm4.sampler_count = 1;
 
wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
wm->wm5.transposed_urb_read = 0;
wm->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
* start point
*/
wm->wm5.enable_16_pix = 1;
wm->wm5.enable_8_pix = 0;
wm->wm5.early_depth_test = 1;
 
/* Each pair of attributes (src/mask coords) is two URB entries */
if (has_mask) {
wm->thread1.binding_table_entry_count = 3;
wm->thread3.urb_entry_read_length = 4;
} else {
wm->thread1.binding_table_entry_count = 2;
wm->thread3.urb_entry_read_length = 2;
}
}
 
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
int i, j;
 
base = ptr =
sna_static_stream_map(stream,
GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
64);
 
for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
struct gen4_cc_unit_state *state =
(struct gen4_cc_unit_state *)ptr;
 
state->cc3.blend_enable =
!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
 
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
 
/* Fill in alpha blend factors same as color, for the future. */
state->cc5.ia_src_blend_factor = i;
state->cc5.ia_dest_blend_factor = j;
 
state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
state->cc6.clamp_post_alpha_blend = 1;
state->cc6.clamp_pre_alpha_blend = 1;
state->cc6.src_blend_factor = i;
state->cc6.dest_blend_factor = j;
 
ptr += 64;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
static bool gen4_render_setup(struct sna *sna)
{
struct gen4_render_state *state = &sna->render_state.gen4;
struct sna_static_stream general;
struct gen4_wm_unit_state_padded *wm_state;
uint32_t sf, wm[KERNEL_COUNT];
int i, j, k, l, m;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
wm[m] = sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data,
16);
}
}
 
state->vs = gen4_create_vs_unit_state(&general);
state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
 
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT,
64);
state->wm = sna_static_stream_offsetof(&general, wm_state);
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
uint32_t sampler_state;
 
sampler_state =
gen4_create_sampler_state(&general,
i, j,
k, l);
 
for (m = 0; m < KERNEL_COUNT; m++) {
gen4_init_wm_state(&wm_state->state,
sna->kgem.gen,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
}
}
}
}
}
 
state->cc = gen4_create_cc_unit_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
 
bool gen4_render_init(struct sna *sna)
{
if (!gen4_render_setup(sna))
return false;
 
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
 
sna->render.blit_tex = gen4_blit_tex;
 
 
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
sna->render.fini = gen4_render_fini;
 
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
/drivers/video/Intel-2D/gen4_render.h
0,0 → 1,2693
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef GEN4_RENDER_H
#define GEN4_RENDER_H
 
#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
 
#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
 
#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
 
#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
 
#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
 
#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
 
#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
 
#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
 
#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
 
#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
/* DW1 */
# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN4_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN4_3DSTATE_PIPELINED_POINTERS */
#define GEN4_GS_DISABLE 0
#define GEN4_GS_ENABLE 1
#define GEN4_CLIP_DISABLE 0
#define GEN4_CLIP_ENABLE 1
 
/* for GEN4_PIPE_CONTROL */
#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define VE0_VALID (1 << 26)
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen4_defines.h */
#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN4_SVG_CTL 0x7400
 
#define GEN4_SVG_CTL_GS_BA (0 << 8)
#define GEN4_SVG_CTL_SS_BA (1 << 8)
#define GEN4_SVG_CTL_IO_BA (2 << 8)
#define GEN4_SVG_CTL_GS_AUB (3 << 8)
#define GEN4_SVG_CTL_IO_AUB (4 << 8)
#define GEN4_SVG_CTL_SIP (5 << 8)
 
#define GEN4_SVG_RDATA 0x7404
#define GEN4_SVG_WORK_CTL 0x7408
 
#define GEN4_VF_CTL 0x7500
 
#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VF_STRG_VAL 0x7504
#define GEN4_VF_STR_VL_OVR 0x7508
#define GEN4_VF_VC_OVR 0x750c
#define GEN4_VF_STR_PSKIP 0x7510
#define GEN4_VF_MAX_PRIM 0x7514
#define GEN4_VF_RDATA 0x7518
 
#define GEN4_VS_CTL 0x7600
#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VS_STRG_VAL 0x7604
#define GEN4_VS_RDATA 0x7608
 
#define GEN4_SF_CTL 0x7b00
#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_SF_STRG_VAL 0x7b04
#define GEN4_SF_RDATA 0x7b18
 
#define GEN4_WIZ_CTL 0x7c00
#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_WIZ_STRG_VAL 0x7c04
#define GEN4_WIZ_RDATA 0x7c18
 
#define GEN4_TS_CTL 0x7e00
#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_TS_STRG_VAL 0x7e04
#define GEN4_TS_RDATA 0x7e08
 
#define GEN4_TD_CTL 0x8000
#define GEN4_TD_CTL_MUX_SHIFT 8
#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN4_TD_CTL2 0x8004
#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN4_TD_VF_VS_EMSK 0x8008
#define GEN4_TD_GS_EMSK 0x800c
#define GEN4_TD_CLIP_EMSK 0x8010
#define GEN4_TD_SF_EMSK 0x8014
#define GEN4_TD_WIZ_EMSK 0x8018
#define GEN4_TD_0_6_EHTRG_VAL 0x801c
#define GEN4_TD_0_7_EHTRG_VAL 0x8020
#define GEN4_TD_0_6_EHTRG_MSK 0x8024
#define GEN4_TD_0_7_EHTRG_MSK 0x8028
#define GEN4_TD_RDATA 0x802c
#define GEN4_TD_TS_EMSK 0x8030
 
#define GEN4_EU_CTL 0x8800
#define GEN4_EU_CTL_SELECT_SHIFT 16
#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
#define GEN4_EU_ATT_0 0x8810
#define GEN4_EU_ATT_1 0x8814
#define GEN4_EU_ATT_DATA_0 0x8820
#define GEN4_EU_ATT_DATA_1 0x8824
#define GEN4_EU_ATT_CLR_0 0x8830
#define GEN4_EU_ATT_CLR_1 0x8834
#define GEN4_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN4_ANISORATIO_2 0
#define GEN4_ANISORATIO_4 1
#define GEN4_ANISORATIO_6 2
#define GEN4_ANISORATIO_8 3
#define GEN4_ANISORATIO_10 4
#define GEN4_ANISORATIO_12 5
#define GEN4_ANISORATIO_14 6
#define GEN4_ANISORATIO_16 7
 
#define GEN4_BLENDFACTOR_ONE 0x1
#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
#define GEN4_BLENDFACTOR_DST_COLOR 0x5
#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN4_BLENDFACTOR_ZERO 0x11
#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN4_BLENDFUNCTION_ADD 0
#define GEN4_BLENDFUNCTION_SUBTRACT 1
#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN4_BLENDFUNCTION_MIN 3
#define GEN4_BLENDFUNCTION_MAX 4
 
#define GEN4_ALPHATEST_FORMAT_UNORM8 0
#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN4_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN4_CLIP_API_OGL 0
#define GEN4_CLIP_API_DX 1
 
#define GEN4_CLIPMODE_NORMAL 0
#define GEN4_CLIPMODE_CLIP_ALL 1
#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN4_CLIPMODE_REJECT_ALL 3
#define GEN4_CLIPMODE_ACCEPT_ALL 4
 
#define GEN4_CLIP_NDCSPACE 0
#define GEN4_CLIP_SCREENSPACE 1
 
#define GEN4_COMPAREFUNCTION_ALWAYS 0
#define GEN4_COMPAREFUNCTION_NEVER 1
#define GEN4_COMPAREFUNCTION_LESS 2
#define GEN4_COMPAREFUNCTION_EQUAL 3
#define GEN4_COMPAREFUNCTION_LEQUAL 4
#define GEN4_COMPAREFUNCTION_GREATER 5
#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
#define GEN4_COMPAREFUNCTION_GEQUAL 7
 
#define GEN4_COVERAGE_PIXELS_HALF 0
#define GEN4_COVERAGE_PIXELS_1 1
#define GEN4_COVERAGE_PIXELS_2 2
#define GEN4_COVERAGE_PIXELS_4 3
 
#define GEN4_CULLMODE_BOTH 0
#define GEN4_CULLMODE_NONE 1
#define GEN4_CULLMODE_FRONT 2
#define GEN4_CULLMODE_BACK 3
 
#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN4_DEPTHFORMAT_D32_FLOAT 1
#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN4_DEPTHFORMAT_D16_UNORM 5
 
#define GEN4_FLOATING_POINT_IEEE_754 0
#define GEN4_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN4_FRONTWINDING_CW 0
#define GEN4_FRONTWINDING_CCW 1
 
#define GEN4_INDEX_BYTE 0
#define GEN4_INDEX_WORD 1
#define GEN4_INDEX_DWORD 2
 
#define GEN4_LOGICOPFUNCTION_CLEAR 0
#define GEN4_LOGICOPFUNCTION_NOR 1
#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN4_LOGICOPFUNCTION_INVERT 5
#define GEN4_LOGICOPFUNCTION_XOR 6
#define GEN4_LOGICOPFUNCTION_NAND 7
#define GEN4_LOGICOPFUNCTION_AND 8
#define GEN4_LOGICOPFUNCTION_EQUIV 9
#define GEN4_LOGICOPFUNCTION_NOOP 10
#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN4_LOGICOPFUNCTION_COPY 12
#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN4_LOGICOPFUNCTION_OR 14
#define GEN4_LOGICOPFUNCTION_SET 15
 
#define GEN4_MAPFILTER_NEAREST 0x0
#define GEN4_MAPFILTER_LINEAR 0x1
#define GEN4_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN4_MIPFILTER_NONE 0
#define GEN4_MIPFILTER_NEAREST 1
#define GEN4_MIPFILTER_LINEAR 3
 
#define GEN4_POLYGON_FRONT_FACING 0
#define GEN4_POLYGON_BACK_FACING 1
 
#define GEN4_PREFILTER_ALWAYS 0x0
#define GEN4_PREFILTER_NEVER 0x1
#define GEN4_PREFILTER_LESS 0x2
#define GEN4_PREFILTER_EQUAL 0x3
#define GEN4_PREFILTER_LEQUAL 0x4
#define GEN4_PREFILTER_GREATER 0x5
#define GEN4_PREFILTER_NOTEQUAL 0x6
#define GEN4_PREFILTER_GEQUAL 0x7
 
#define GEN4_PROVOKING_VERTEX_0 0
#define GEN4_PROVOKING_VERTEX_1 1
#define GEN4_PROVOKING_VERTEX_2 2
 
#define GEN4_RASTRULE_UPPER_LEFT 0
#define GEN4_RASTRULE_UPPER_RIGHT 1
 
#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN4_STENCILOP_KEEP 0
#define GEN4_STENCILOP_ZERO 1
#define GEN4_STENCILOP_REPLACE 2
#define GEN4_STENCILOP_INCRSAT 3
#define GEN4_STENCILOP_DECRSAT 4
#define GEN4_STENCILOP_INCR 5
#define GEN4_STENCILOP_DECR 6
#define GEN4_STENCILOP_INVERT 7
 
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
#define GEN4_SURFACEFORMAT_R8_SINT 0x142
#define GEN4_SURFACEFORMAT_R8_UINT 0x143
#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN4_SURFACEFORMAT_R1_UINT 0x181
#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN4_SURFACEFORMAT_MONO8 0x18E
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN4_SURFACEFORMAT_FXT1 0x192
#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
#define GEN4_SURFACERETURNFORMAT_S1 1
 
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
#define GEN4_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN4_TEXCOORDMODE_WRAP 0
#define GEN4_TEXCOORDMODE_MIRROR 1
#define GEN4_TEXCOORDMODE_CLAMP 2
#define GEN4_TEXCOORDMODE_CUBE 3
#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN4_THREAD_PRIORITY_NORMAL 0
#define GEN4_THREAD_PRIORITY_HIGH 1
 
#define GEN4_TILEWALK_XMAJOR 0
#define GEN4_TILEWALK_YMAJOR 1
 
#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define VFCOMPONENT_NOSTORE 0
#define VFCOMPONENT_STORE_SRC 1
#define VFCOMPONENT_STORE_0 2
#define VFCOMPONENT_STORE_1_FLT 3
#define VFCOMPONENT_STORE_1_INT 4
#define VFCOMPONENT_STORE_VID 5
#define VFCOMPONENT_STORE_IID 6
#define VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN4_ALIGN_1 0
#define GEN4_ALIGN_16 1
 
#define GEN4_ADDRESS_DIRECT 0
#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN4_CHANNEL_X 0
#define GEN4_CHANNEL_Y 1
#define GEN4_CHANNEL_Z 2
#define GEN4_CHANNEL_W 3
 
#define GEN4_COMPRESSION_NONE 0
#define GEN4_COMPRESSION_2NDHALF 1
#define GEN4_COMPRESSION_COMPRESSED 2
 
#define GEN4_CONDITIONAL_NONE 0
#define GEN4_CONDITIONAL_Z 1
#define GEN4_CONDITIONAL_NZ 2
#define GEN4_CONDITIONAL_EQ 1 /* Z */
#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
#define GEN4_CONDITIONAL_G 3
#define GEN4_CONDITIONAL_GE 4
#define GEN4_CONDITIONAL_L 5
#define GEN4_CONDITIONAL_LE 6
#define GEN4_CONDITIONAL_C 7
#define GEN4_CONDITIONAL_O 8
 
#define GEN4_DEBUG_NONE 0
#define GEN4_DEBUG_BREAKPOINT 1
 
#define GEN4_DEPENDENCY_NORMAL 0
#define GEN4_DEPENDENCY_NOTCLEARED 1
#define GEN4_DEPENDENCY_NOTCHECKED 2
#define GEN4_DEPENDENCY_DISABLE 3
 
#define GEN4_EXECUTE_1 0
#define GEN4_EXECUTE_2 1
#define GEN4_EXECUTE_4 2
#define GEN4_EXECUTE_8 3
#define GEN4_EXECUTE_16 4
#define GEN4_EXECUTE_32 5
 
#define GEN4_HORIZONTAL_STRIDE_0 0
#define GEN4_HORIZONTAL_STRIDE_1 1
#define GEN4_HORIZONTAL_STRIDE_2 2
#define GEN4_HORIZONTAL_STRIDE_4 3
 
#define GEN4_INSTRUCTION_NORMAL 0
#define GEN4_INSTRUCTION_SATURATE 1
 
#define _MASK_ENABLE 0
#define _MASK_DISABLE 1
 
#define GEN4_OPCODE_MOV 1
#define GEN4_OPCODE_SEL 2
#define GEN4_OPCODE_NOT 4
#define GEN4_OPCODE_AND 5
#define GEN4_OPCODE_OR 6
#define GEN4_OPCODE_XOR 7
#define GEN4_OPCODE_SHR 8
#define GEN4_OPCODE_SHL 9
#define GEN4_OPCODE_RSR 10
#define GEN4_OPCODE_RSL 11
#define GEN4_OPCODE_ASR 12
#define GEN4_OPCODE_CMP 16
#define GEN4_OPCODE_JMPI 32
#define GEN4_OPCODE_IF 34
#define GEN4_OPCODE_IFF 35
#define GEN4_OPCODE_ELSE 36
#define GEN4_OPCODE_ENDIF 37
#define GEN4_OPCODE_DO 38
#define GEN4_OPCODE_WHILE 39
#define GEN4_OPCODE_BREAK 40
#define GEN4_OPCODE_CONTINUE 41
#define GEN4_OPCODE_HALT 42
#define GEN4_OPCODE_MSAVE 44
#define GEN4_OPCODE_MRESTORE 45
#define GEN4_OPCODE_PUSH 46
#define GEN4_OPCODE_POP 47
#define GEN4_OPCODE_WAIT 48
#define GEN4_OPCODE_SEND 49
#define GEN4_OPCODE_ADD 64
#define GEN4_OPCODE_MUL 65
#define GEN4_OPCODE_AVG 66
#define GEN4_OPCODE_FRC 67
#define GEN4_OPCODE_RNDU 68
#define GEN4_OPCODE_RNDD 69
#define GEN4_OPCODE_RNDE 70
#define GEN4_OPCODE_RNDZ 71
#define GEN4_OPCODE_MAC 72
#define GEN4_OPCODE_MACH 73
#define GEN4_OPCODE_LZD 74
#define GEN4_OPCODE_SAD2 80
#define GEN4_OPCODE_SADA2 81
#define GEN4_OPCODE_DP4 84
#define GEN4_OPCODE_DPH 85
#define GEN4_OPCODE_DP3 86
#define GEN4_OPCODE_DP2 87
#define GEN4_OPCODE_DPA2 88
#define GEN4_OPCODE_LINE 89
#define GEN4_OPCODE_NOP 126
 
#define GEN4_PREDICATE_NONE 0
#define GEN4_PREDICATE_NORMAL 1
#define GEN4_PREDICATE_ALIGN1_ANYV 2
#define GEN4_PREDICATE_ALIGN1_ALLV 3
#define GEN4_PREDICATE_ALIGN1_ANY2H 4
#define GEN4_PREDICATE_ALIGN1_ALL2H 5
#define GEN4_PREDICATE_ALIGN1_ANY4H 6
#define GEN4_PREDICATE_ALIGN1_ALL4H 7
#define GEN4_PREDICATE_ALIGN1_ANY8H 8
#define GEN4_PREDICATE_ALIGN1_ALL8H 9
#define GEN4_PREDICATE_ALIGN1_ANY16H 10
#define GEN4_PREDICATE_ALIGN1_ALL16H 11
#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN4_PREDICATE_ALIGN16_ANY4H 6
#define GEN4_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN4_ARCHITECTURE_REGISTER_FILE 0
#define GEN4_GENERAL_REGISTER_FILE 1
#define GEN4_MESSAGE_REGISTER_FILE 2
#define GEN4_IMMEDIATE_VALUE 3
 
#define GEN4_REGISTER_TYPE_UD 0
#define GEN4_REGISTER_TYPE_D 1
#define GEN4_REGISTER_TYPE_UW 2
#define GEN4_REGISTER_TYPE_W 3
#define GEN4_REGISTER_TYPE_UB 4
#define GEN4_REGISTER_TYPE_B 5
#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN4_REGISTER_TYPE_HF 6
#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN4_REGISTER_TYPE_F 7
 
#define GEN4_ARF_NULL 0x00
#define GEN4_ARF_ADDRESS 0x10
#define GEN4_ARF_ACCUMULATOR 0x20
#define GEN4_ARF_FLAG 0x30
#define GEN4_ARF_MASK 0x40
#define GEN4_ARF_MASK_STACK 0x50
#define GEN4_ARF_MASK_STACK_DEPTH 0x60
#define GEN4_ARF_STATE 0x70
#define GEN4_ARF_CONTROL 0x80
#define GEN4_ARF_NOTIFICATION_COUNT 0x90
#define GEN4_ARF_IP 0xA0
 
#define GEN4_AMASK 0
#define GEN4_IMASK 1
#define GEN4_LMASK 2
#define GEN4_CMASK 3
 
 
 
#define GEN4_THREAD_NORMAL 0
#define GEN4_THREAD_ATOMIC 1
#define GEN4_THREAD_SWITCH 2
 
#define GEN4_VERTICAL_STRIDE_0 0
#define GEN4_VERTICAL_STRIDE_1 1
#define GEN4_VERTICAL_STRIDE_2 2
#define GEN4_VERTICAL_STRIDE_4 3
#define GEN4_VERTICAL_STRIDE_8 4
#define GEN4_VERTICAL_STRIDE_16 5
#define GEN4_VERTICAL_STRIDE_32 6
#define GEN4_VERTICAL_STRIDE_64 7
#define GEN4_VERTICAL_STRIDE_128 8
#define GEN4_VERTICAL_STRIDE_256 9
#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN4_WIDTH_1 0
#define GEN4_WIDTH_2 1
#define GEN4_WIDTH_4 2
#define GEN4_WIDTH_8 3
#define GEN4_WIDTH_16 4
 
#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN4_POLYGON_FACING_FRONT 0
#define GEN4_POLYGON_FACING_BACK 1
 
#define GEN4_MESSAGE_TARGET_NULL 0
#define GEN4_MESSAGE_TARGET_MATH 1
#define GEN4_MESSAGE_TARGET_SAMPLER 2
#define GEN4_MESSAGE_TARGET_GATEWAY 3
#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN4_MESSAGE_TARGET_URB 6
#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN4_MATH_FUNCTION_INV 1
#define GEN4_MATH_FUNCTION_LOG 2
#define GEN4_MATH_FUNCTION_EXP 3
#define GEN4_MATH_FUNCTION_SQRT 4
#define GEN4_MATH_FUNCTION_RSQ 5
#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN4_MATH_FUNCTION_TAN 9
#define GEN4_MATH_FUNCTION_POW 10
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN4_MATH_INTEGER_UNSIGNED 0
#define GEN4_MATH_INTEGER_SIGNED 1
 
#define GEN4_MATH_PRECISION_FULL 0
#define GEN4_MATH_PRECISION_PARTIAL 1
 
#define GEN4_MATH_SATURATE_NONE 0
#define GEN4_MATH_SATURATE_SATURATE 1
 
#define GEN4_MATH_DATA_VECTOR 0
#define GEN4_MATH_DATA_SCALAR 1
 
#define GEN4_URB_OPCODE_WRITE 0
 
#define GEN4_URB_SWIZZLE_NONE 0
#define GEN4_URB_SWIZZLE_INTERLEAVE 1
#define GEN4_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN4_SCRATCH_SPACE_SIZE_1K 0
#define GEN4_SCRATCH_SPACE_SIZE_2K 1
#define GEN4_SCRATCH_SPACE_SIZE_4K 2
#define GEN4_SCRATCH_SPACE_SIZE_8K 3
#define GEN4_SCRATCH_SPACE_SIZE_16K 4
#define GEN4_SCRATCH_SPACE_SIZE_32K 5
#define GEN4_SCRATCH_SPACE_SIZE_64K 6
#define GEN4_SCRATCH_SPACE_SIZE_128K 7
#define GEN4_SCRATCH_SPACE_SIZE_256K 8
#define GEN4_SCRATCH_SPACE_SIZE_512K 9
#define GEN4_SCRATCH_SPACE_SIZE_1M 10
#define GEN4_SCRATCH_SPACE_SIZE_2M 11
 
 
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CONST_BUFFER_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT 0x6104
 
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
#define CMD_VERTEX_ELEMENT 0x7809
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS 0x780b
 
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_3D_PRIM 0x7b00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
 
/* media pipeline */
 
#define GEN4_VFE_MODE_GENERIC 0x0
#define GEN4_VFE_MODE_VLD_MPEG2 0x1
#define GEN4_VFE_MODE_IS 0x2
#define GEN4_VFE_MODE_AVC_MC 0x4
#define GEN4_VFE_MODE_AVC_IT 0x7
#define GEN4_VFE_MODE_VC1_IT 0xB
 
#define GEN4_VFE_DEBUG_COUNTER_FREE 0
#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
 
/* VLD_STATE */
#define GEN4_MPEG_TOP_FIELD 1
#define GEN4_MPEG_BOTTOM_FIELD 2
#define GEN4_MPEG_FRAME 3
#define GEN4_MPEG_QSCALE_LINEAR 0
#define GEN4_MPEG_QSCALE_NONLINEAR 1
#define GEN4_MPEG_ZIGZAG_SCAN 0
#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
#define GEN4_MPEG_I_PICTURE 1
#define GEN4_MPEG_P_PICTURE 2
#define GEN4_MPEG_B_PICTURE 3
 
/* Command packets:
*/
struct header
{
unsigned int length:16;
unsigned int opcode:16;
};
 
 
union header_union
{
struct header bits;
unsigned int dword;
};
 
struct gen4_3d_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:3;
unsigned int wc_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int operation:2;
unsigned int opcode:16;
} header;
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} dest;
unsigned int dword2;
unsigned int dword3;
};
 
 
struct gen4_3d_primitive
{
struct
{
unsigned int length:8;
unsigned int pad:2;
unsigned int topology:5;
unsigned int indexed:1;
unsigned int opcode:16;
} header;
 
unsigned int verts_per_instance;
unsigned int start_vert_location;
unsigned int instance_count;
unsigned int start_instance_location;
unsigned int base_vert_location;
};
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define GEN4_FLUSH_READ_CACHE 0x1
#define GEN4_FLUSH_STATE_CACHE 0x2
#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct gen4_mi_flush
{
unsigned int flags:4;
unsigned int pad:12;
unsigned int opcode:16;
};
 
struct gen4_vf_statistics
{
unsigned int statistics_enable:1;
unsigned int pad:15;
unsigned int opcode:16;
};
 
 
 
struct gen4_binding_table_pointers
{
struct header header;
unsigned int vs;
unsigned int gs;
unsigned int clp;
unsigned int sf;
unsigned int wm;
};
 
 
struct gen4_blend_constant_color
{
struct header header;
float blend_constant_color[4];
};
 
 
struct gen4_depthbuffer
{
union header_union header;
union {
struct {
unsigned int pitch:18;
unsigned int format:3;
unsigned int pad:4;
unsigned int depth_offset_disable:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad2:1;
unsigned int surface_type:3;
} bits;
unsigned int dword;
} dword1;
unsigned int dword2_base_addr;
union {
struct {
unsigned int pad:1;
unsigned int mipmap_layout:1;
unsigned int lod:4;
unsigned int width:13;
unsigned int height:13;
} bits;
unsigned int dword;
} dword3;
 
union {
struct {
unsigned int pad:12;
unsigned int min_array_element:9;
unsigned int depth:11;
} bits;
unsigned int dword;
} dword4;
};
 
struct gen4_drawrect
{
struct header header;
unsigned int xmin:16;
unsigned int ymin:16;
unsigned int xmax:16;
unsigned int ymax:16;
unsigned int xorg:16;
unsigned int yorg:16;
};
 
 
 
 
struct gen4_global_depth_offset_clamp
{
struct header header;
float depth_offset_clamp;
};
 
struct gen4_indexbuffer
{
union {
struct
{
unsigned int length:8;
unsigned int index_format:2;
unsigned int cut_index_enable:1;
unsigned int pad:5;
unsigned int opcode:16;
} bits;
unsigned int dword;
 
} header;
 
unsigned int buffer_start;
unsigned int buffer_end;
};
 
 
struct gen4_line_stipple
{
struct header header;
struct
{
unsigned int pattern:16;
unsigned int pad:16;
} bits0;
struct
{
unsigned int repeat_count:9;
unsigned int pad:7;
unsigned int inverse_repeat_count:16;
} bits1;
};
 
 
struct gen4_pipelined_state_pointers
{
struct header header;
struct {
unsigned int pad:5;
unsigned int offset:27;
} vs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} gs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} clp;
struct
{
unsigned int pad:5;
unsigned int offset:27;
} sf;
 
struct
{
unsigned int pad:5;
unsigned int offset:27;
} wm;
struct
{
unsigned int pad:5;
unsigned int offset:27; /* KW: check me! */
} cc;
};
 
 
struct gen4_polygon_stipple_offset
{
struct header header;
 
struct {
unsigned int y_offset:5;
unsigned int pad:3;
unsigned int x_offset:5;
unsigned int pad0:19;
} bits0;
};
 
 
 
struct gen4_polygon_stipple
{
struct header header;
unsigned int stipple[32];
};
 
 
 
struct gen4_pipeline_select
{
struct
{
unsigned int pipeline_select:1;
unsigned int pad:15;
unsigned int opcode:16;
} header;
};
 
 
struct gen4_pipe_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:2;
unsigned int instruction_state_cache_flush_enable:1;
unsigned int write_cache_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int post_sync_operation:2;
 
unsigned int opcode:16;
} header;
 
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} bits1;
 
unsigned int data0;
unsigned int data1;
};
 
 
struct gen4_urb_fence
{
struct
{
unsigned int length:8;
unsigned int vs_realloc:1;
unsigned int gs_realloc:1;
unsigned int clp_realloc:1;
unsigned int sf_realloc:1;
unsigned int vfe_realloc:1;
unsigned int cs_realloc:1;
unsigned int pad:2;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int vs_fence:10;
unsigned int gs_fence:10;
unsigned int clp_fence:10;
unsigned int pad:2;
} bits0;
 
struct
{
unsigned int sf_fence:10;
unsigned int vf_fence:10;
unsigned int cs_fence:10;
unsigned int pad:2;
} bits1;
};
 
struct gen4_constant_buffer_state /* previously gen4_command_streamer */
{
struct header header;
 
struct
{
unsigned int nr_urb_entries:3;
unsigned int pad:1;
unsigned int urb_entry_size:5;
unsigned int pad0:23;
} bits0;
};
 
struct gen4_constant_buffer
{
struct
{
unsigned int length:8;
unsigned int valid:1;
unsigned int pad:7;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int buffer_length:6;
unsigned int buffer_address:26;
} bits0;
};
 
struct gen4_state_base_address
{
struct header header;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int general_state_address:27;
} bits0;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int surface_state_address:27;
} bits1;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int indirect_object_state_address:27;
} bits2;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int general_state_upper_bound:20;
} bits3;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int indirect_object_state_upper_bound:20;
} bits4;
};
 
struct gen4_state_prefetch
{
struct header header;
 
struct
{
unsigned int prefetch_count:3;
unsigned int pad:3;
unsigned int prefetch_pointer:26;
} bits0;
};
 
struct gen4_system_instruction_pointer
{
struct header header;
 
struct
{
unsigned int pad:4;
unsigned int system_instruction_pointer:28;
} bits0;
};
 
 
 
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
unsigned int pad0:1;
unsigned int grf_reg_count:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer:26;
};
 
struct thread1
{
unsigned int ext_halt_exception_enable:1;
unsigned int sw_exception_enable:1;
unsigned int mask_stack_exception_enable:1;
unsigned int timeout_exception_enable:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad0:3;
unsigned int depth_coef_urb_read_offset:6; /* WM only */
unsigned int pad1:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad3:5;
unsigned int single_program_flow:1;
};
 
struct thread2
{
unsigned int per_thread_scratch_space:4;
unsigned int pad0:6;
unsigned int scratch_space_base_pointer:22;
};
 
struct thread3
{
unsigned int dispatch_grf_start_reg:4;
unsigned int urb_entry_read_offset:6;
unsigned int pad0:1;
unsigned int urb_entry_read_length:6;
unsigned int pad1:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int pad2:1;
unsigned int const_urb_entry_read_length:6;
unsigned int pad3:1;
};
 
 
 
struct gen4_clip_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:9;
unsigned int gs_output_stats:1; /* not always */
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6; /* may be less */
unsigned int pad3:1;
} thread4;
struct
{
unsigned int pad0:13;
unsigned int clip_mode:3;
unsigned int userclip_enable_flags:8;
unsigned int userclip_must_clip:1;
unsigned int pad1:1;
unsigned int guard_band_enable:1;
unsigned int viewport_z_clip_enable:1;
unsigned int viewport_xy_clip_enable:1;
unsigned int vertex_position_space:1;
unsigned int api_mode:1;
unsigned int pad2:1;
} clip5;
struct
{
unsigned int pad0:5;
unsigned int clipper_viewport_state_ptr:27;
} clip6;
 
float viewport_xmin;
float viewport_xmax;
float viewport_ymin;
float viewport_ymax;
};
 
 
 
struct gen4_cc_unit_state
{
struct
{
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} cc0;
 
struct
{
unsigned int bf_stencil_ref:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
unsigned int stencil_ref:8;
} cc1;
 
struct
{
unsigned int logicop_enable:1;
unsigned int pad0:10;
unsigned int depth_write_enable:1;
unsigned int depth_test_function:3;
unsigned int depth_test:1;
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
} cc2;
 
struct
{
unsigned int pad0:8;
unsigned int alpha_test_func:3;
unsigned int alpha_test:1;
unsigned int blend_enable:1;
unsigned int ia_blend_enable:1;
unsigned int pad1:1;
unsigned int alpha_test_format:1;
unsigned int pad2:16;
} cc3;
struct
{
unsigned int pad0:5;
unsigned int cc_viewport_state_offset:27;
} cc4;
struct
{
unsigned int pad0:2;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_src_blend_factor:5;
unsigned int ia_blend_function:3;
unsigned int statistics_enable:1;
unsigned int logicop_func:4;
unsigned int pad1:11;
unsigned int dither_enable:1;
} cc5;
 
struct
{
unsigned int clamp_post_alpha_blend:1;
unsigned int clamp_pre_alpha_blend:1;
unsigned int clamp_range:2;
unsigned int pad0:11;
unsigned int y_dither_offset:2;
unsigned int x_dither_offset:2;
unsigned int dest_blend_factor:5;
unsigned int src_blend_factor:5;
unsigned int blend_function:3;
} cc6;
 
struct {
union {
float f;
unsigned char ub[4];
} alpha_ref;
} cc7;
};
 
 
 
struct gen4_sf_unit_state
{
struct thread0 thread0;
struct {
unsigned int pad0:7;
unsigned int sw_exception_enable:1;
unsigned int pad1:3;
unsigned int mask_stack_exception_enable:1;
unsigned int pad2:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad3:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad4:5;
unsigned int single_program_flow:1;
} sf1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6;
unsigned int pad3:1;
} thread4;
 
struct
{
unsigned int front_winding:1;
unsigned int viewport_transform:1;
unsigned int pad0:3;
unsigned int sf_viewport_state_offset:27;
} sf5;
struct
{
unsigned int pad0:9;
unsigned int dest_org_vbias:4;
unsigned int dest_org_hbias:4;
unsigned int scissor:1;
unsigned int disable_2x2_trifilter:1;
unsigned int disable_zero_pix_trifilter:1;
unsigned int point_rast_rule:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int line_width:4;
unsigned int fast_scissor_disable:1;
unsigned int cull_mode:2;
unsigned int aa_enable:1;
} sf6;
 
struct
{
unsigned int point_size:11;
unsigned int use_point_size_state:1;
unsigned int subpixel_precision:1;
unsigned int sprite_point:1;
unsigned int pad0:11;
unsigned int trifan_pv:2;
unsigned int linestrip_pv:2;
unsigned int tristrip_pv:2;
unsigned int line_last_pixel_enable:1;
} sf7;
 
};
 
 
struct gen4_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:1;
unsigned int pad3:6;
} thread4;
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} gs5;
 
struct
{
unsigned int max_vp_index:4;
unsigned int pad0:26;
unsigned int reorder_enable:1;
unsigned int pad1:1;
} gs6;
};
 
 
struct gen4_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:4;
unsigned int pad3:3;
} thread4;
 
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} vs5;
 
struct
{
unsigned int vs_enable:1;
unsigned int vert_cache_disable:1;
unsigned int pad0:30;
} vs6;
};
 
 
struct gen4_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned int stats_enable:1;
unsigned int pad0:1;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} wm4;
struct
{
unsigned int enable_8_pix:1;
unsigned int enable_16_pix:1;
unsigned int enable_32_pix:1;
unsigned int pad0:7;
unsigned int legacy_global_depth_bias:1;
unsigned int line_stipple:1;
unsigned int depth_offset:1;
unsigned int polygon_stipple:1;
unsigned int line_aa_region_width:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int early_depth_test:1;
unsigned int thread_dispatch_enable:1;
unsigned int program_uses_depth:1;
unsigned int program_computes_depth:1;
unsigned int program_uses_killpixel:1;
unsigned int legacy_line_rast: 1;
unsigned int transposed_urb_read:1;
unsigned int max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_1:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_1:26;
} wm8;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_2:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_2:26;
} wm9;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_3:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_3:26;
} wm10;
};
 
struct gen4_wm_unit_state_padded {
struct gen4_wm_unit_state state;
char pad[64 - sizeof(struct gen4_wm_unit_state)];
};
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen4_sampler_default_border_color {
float color[4];
};
 
struct gen4_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen4_sampler_state
{
struct
{
unsigned int shadow_function:3;
unsigned int lod_bias:11;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad:1;
unsigned int lod_preclamp:1;
unsigned int border_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct
{
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:3;
unsigned int max_lod:10;
unsigned int min_lod:10;
} ss1;
 
struct
{
unsigned int pad:5;
unsigned int border_color_pointer:27;
} ss2;
struct
{
unsigned int pad:19;
unsigned int max_aniso:3;
unsigned int chroma_key_mode:1;
unsigned int chroma_key_index:2;
unsigned int chroma_key_enable:1;
unsigned int monochrome_filter_width:3;
unsigned int monochrome_filter_height:3;
} ss3;
};
 
 
struct gen4_clipper_viewport
{
float xmin;
float xmax;
float ymin;
float ymax;
};
 
struct gen4_cc_viewport
{
float min_depth;
float max_depth;
};
 
struct gen4_sf_viewport
{
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
 
struct {
short xmin;
short ymin;
short xmax;
short ymax;
} scissor;
};
 
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct gen4_surface_state
{
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad:3;
unsigned int render_cache_read_mode:1;
unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int color_blend:1;
unsigned int writedisable_blue:1;
unsigned int writedisable_green:1;
unsigned int writedisable_red:1;
unsigned int writedisable_alpha:1;
unsigned int surface_format:9;
unsigned int data_return_format:1;
unsigned int pad0:1;
unsigned int surface_type:3;
} ss0;
struct {
unsigned int base_addr;
} ss1;
struct {
unsigned int render_target_rotation:2;
unsigned int mip_count:4;
unsigned int width:13;
unsigned int height:13;
} ss2;
 
struct {
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad:1;
unsigned int pitch:18;
unsigned int depth:11;
} ss3;
struct {
unsigned int pad:19;
unsigned int min_array_elt:9;
unsigned int min_lod:4;
} ss4;
 
struct {
unsigned int pad:20;
unsigned int y_offset:4;
unsigned int pad2:1;
unsigned int x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN4_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN4_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN4_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN4_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN4_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN4_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN4_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN4_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN4_SURFACE_FORMAT_SHIFT 18
#define GEN4_SURFACE_FORMAT_MASK _MASK(26, 18)
 
#define GEN4_SURFACE_TYPE_SHIFT 29
#define GEN4_SURFACE_TYPE_MASK _MASK(31, 29)
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN4_SURFACE_HEIGHT_SHIFT 19
#define GEN4_SURFACE_HEIGHT_MASK _MASK(31, 19)
#define GEN4_SURFACE_WIDTH_SHIFT 6
#define GEN4_SURFACE_WIDTH_MASK _MASK(18, 6)
#define GEN4_SURFACE_LOD_SHIFT 2
#define GEN4_SURFACE_LOD_MASK _MASK(5, 2)
 
/* Surface state DW3 */
#define GEN4_SURFACE_DEPTH_SHIFT 21
#define GEN4_SURFACE_DEPTH_MASK _MASK(31, 21)
#define GEN4_SURFACE_PITCH_SHIFT 3
#define GEN4_SURFACE_PITCH_MASK _MASK(19, 3)
#define GEN4_SURFACE_TILED (1 << 1)
#define GEN4_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN4_SURFACE_MIN_LOD_SHIFT 28
#define GEN4_SURFACE_MIN_LOD_MASK _MASK(31, 28)
 
/* Surface state DW5 */
#define GEN4_SURFACE_X_OFFSET_SHIFT 25
#define GEN4_SURFACE_X_OFFSET_MASK _MASK(31, 25)
#define GEN4_SURFACE_Y_OFFSET_SHIFT 20
#define GEN4_SURFACE_Y_OFFSET_MASK _MASK(23, 20)
 
 
struct gen4_vertex_buffer_state
{
struct {
unsigned int pitch:11;
unsigned int pad:15;
unsigned int access_type:1;
unsigned int vb_index:5;
} vb0;
unsigned int start_addr;
unsigned int max_index;
#if 1
unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
#endif
};
 
#define GEN4_VBP_MAX 17
 
struct gen4_vb_array_state {
struct header header;
struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
};
 
 
struct gen4_vertex_element_state
{
struct
{
unsigned int src_offset:11;
unsigned int pad:5;
unsigned int src_format:9;
unsigned int pad0:1;
unsigned int valid:1;
unsigned int vertex_buffer_index:5;
} ve0;
struct
{
unsigned int dst_offset:8;
unsigned int pad:8;
unsigned int vfcomponent3:4;
unsigned int vfcomponent2:4;
unsigned int vfcomponent1:4;
unsigned int vfcomponent0:4;
} ve1;
};
 
#define GEN4_VEP_MAX 18
 
struct gen4_vertex_element_packet {
struct header header;
struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
};
 
 
struct gen4_urb_immediate {
unsigned int opcode:4;
unsigned int offset:6;
unsigned int swizzle_control:2;
unsigned int pad:1;
unsigned int allocate:1;
unsigned int used:1;
unsigned int complete:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct gen4_instruction
{
struct
{
unsigned int opcode:7;
unsigned int pad:1;
unsigned int access_mode:1;
unsigned int mask_control:1;
unsigned int dependency_control:2;
unsigned int compression_control:2;
unsigned int thread_control:2;
unsigned int predicate_control:4;
unsigned int predicate_inverse:1;
unsigned int execution_size:3;
unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
unsigned int pad0:2;
unsigned int debug_control:1;
unsigned int saturate:1;
} header;
 
union {
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad:1;
unsigned int dest_subreg_nr:5;
unsigned int dest_reg_nr:8;
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} da1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad:6;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} ia1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad0:1;
unsigned int dest_writemask:4;
unsigned int dest_subreg_nr:1;
unsigned int dest_reg_nr:8;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} da16;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad0:6;
unsigned int dest_writemask:4;
int dest_indirect_offset:6;
unsigned int dest_subreg_nr:3;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} ia16;
} bits1;
 
 
union {
struct
{
unsigned int src0_subreg_nr:5;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} da1;
 
struct
{
int src0_indirect_offset:10;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} ia1;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
unsigned int src0_subreg_nr:1;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} da16;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
int src0_indirect_offset:6;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia16;
 
} bits2;
 
union
{
struct
{
unsigned int src1_subreg_nr:5;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int pad0:7;
} da1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
unsigned int src1_subreg_nr:1;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int pad2:7;
} da16;
 
struct
{
int src1_indirect_offset:10;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
int src1_indirect_offset:6;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad2:6;
} ia16;
 
 
struct
{
int jump_count:16; /* note: signed */
unsigned int pop_count:4;
unsigned int pad0:12;
} if_else;
 
struct {
unsigned int function:4;
unsigned int int_type:1;
unsigned int precision:1;
unsigned int saturate:1;
unsigned int data_type:1;
unsigned int pad0:8;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} math;
 
struct {
unsigned int binding_table_index:8;
unsigned int sampler:4;
unsigned int return_format:2;
unsigned int msg_type:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} sampler;
 
struct gen4_urb_immediate urb;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:4;
unsigned int msg_type:2;
unsigned int target_cache:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_read;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:3;
unsigned int pixel_scoreboard_clear:1;
unsigned int msg_type:3;
unsigned int send_commit_msg:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_write;
 
struct {
unsigned int pad:16;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} generic;
 
unsigned int ud;
} bits3;
};
 
/* media pipeline */
 
struct gen4_vfe_state {
struct {
unsigned int per_thread_scratch_space:4;
unsigned int pad3:3;
unsigned int extend_vfe_state_present:1;
unsigned int pad2:2;
unsigned int scratch_base:22;
} vfe0;
 
struct {
unsigned int debug_counter_control:2;
unsigned int children_present:1;
unsigned int vfe_mode:4;
unsigned int pad2:2;
unsigned int num_urb_entries:7;
unsigned int urb_entry_alloc_size:9;
unsigned int max_threads:7;
} vfe1;
 
struct {
unsigned int pad4:4;
unsigned int interface_descriptor_base:28;
} vfe2;
};
 
struct gen4_vld_state {
struct {
unsigned int pad6:6;
unsigned int scan_order:1;
unsigned int intra_vlc_format:1;
unsigned int quantizer_scale_type:1;
unsigned int concealment_motion_vector:1;
unsigned int frame_predict_frame_dct:1;
unsigned int top_field_first:1;
unsigned int picture_structure:2;
unsigned int intra_dc_precision:2;
unsigned int f_code_0_0:4;
unsigned int f_code_0_1:4;
unsigned int f_code_1_0:4;
unsigned int f_code_1_1:4;
} vld0;
 
struct {
unsigned int pad2:9;
unsigned int picture_coding_type:2;
unsigned int pad:21;
} vld1;
 
struct {
unsigned int index_0:4;
unsigned int index_1:4;
unsigned int index_2:4;
unsigned int index_3:4;
unsigned int index_4:4;
unsigned int index_5:4;
unsigned int index_6:4;
unsigned int index_7:4;
} desc_remap_table0;
 
struct {
unsigned int index_8:4;
unsigned int index_9:4;
unsigned int index_10:4;
unsigned int index_11:4;
unsigned int index_12:4;
unsigned int index_13:4;
unsigned int index_14:4;
unsigned int index_15:4;
} desc_remap_table1;
};
 
struct gen4_interface_descriptor {
struct {
unsigned int grf_reg_blocks:4;
unsigned int pad:2;
unsigned int kernel_start_pointer:26;
} desc0;
 
struct {
unsigned int pad:7;
unsigned int software_exception:1;
unsigned int pad2:3;
unsigned int maskstack_exception:1;
unsigned int pad3:1;
unsigned int illegal_opcode_exception:1;
unsigned int pad4:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int single_program_flow:1;
unsigned int pad5:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int const_urb_entry_read_len:6;
} desc1;
 
struct {
unsigned int pad:2;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} desc2;
 
struct {
unsigned int binding_table_entry_count:5;
unsigned int binding_table_pointer:27;
} desc3;
};
 
struct gen6_blend_state
{
struct {
unsigned int dest_blend_factor:5;
unsigned int source_blend_factor:5;
unsigned int pad3:1;
unsigned int blend_func:3;
unsigned int pad2:1;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_source_blend_factor:5;
unsigned int pad1:1;
unsigned int ia_blend_func:3;
unsigned int pad0:1;
unsigned int ia_blend_enable:1;
unsigned int blend_enable:1;
} blend0;
 
struct {
unsigned int post_blend_clamp_enable:1;
unsigned int pre_blend_clamp_enable:1;
unsigned int clamp_range:2;
unsigned int pad0:4;
unsigned int x_dither_offset:2;
unsigned int y_dither_offset:2;
unsigned int dither_enable:1;
unsigned int alpha_test_func:3;
unsigned int alpha_test_enable:1;
unsigned int pad1:1;
unsigned int logic_op_func:4;
unsigned int logic_op_enable:1;
unsigned int pad2:1;
unsigned int write_disable_b:1;
unsigned int write_disable_g:1;
unsigned int write_disable_r:1;
unsigned int write_disable_a:1;
unsigned int pad3:1;
unsigned int alpha_to_coverage_dither:1;
unsigned int alpha_to_one:1;
unsigned int alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
unsigned int alpha_test_format:1;
unsigned int pad0:14;
unsigned int round_disable:1;
unsigned int bf_stencil_ref:8;
unsigned int stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
unsigned int ui:8;
unsigned int pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} ds0;
 
struct {
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
} ds1;
 
struct {
unsigned int pad0:26;
unsigned int depth_write_enable:1;
unsigned int depth_test_func:3;
unsigned int pad1:1;
unsigned int depth_test_enable:1;
} ds2;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
typedef enum {
WM_KERNEL = 0,
WM_KERNEL_P,
 
WM_KERNEL_MASK,
WM_KERNEL_MASK_P,
 
WM_KERNEL_MASKCA,
WM_KERNEL_MASKCA_P,
 
WM_KERNEL_MASKSA,
WM_KERNEL_MASKSA_P,
 
WM_KERNEL_OPACITY,
WM_KERNEL_OPACITY_P,
 
WM_KERNEL_VIDEO_PLANAR,
WM_KERNEL_VIDEO_PACKED,
KERNEL_COUNT
} wm_kernel_t;
 
#endif
/drivers/video/Intel-2D/gen5_render.c
1062,7 → 1062,6
gen5_align_vertex(sna, tmp);
return true;
 
return false;
}
 
 
1407,5 → 1406,7
 
sna->render.max_3d_size = MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
/drivers/video/Intel-2D/gen6_render.c
42,7 → 42,7
 
#include "brw/brw.h"
#include "gen6_render.h"
 
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
209,12 → 209,12
{
uint32_t src, dst;
 
// src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
 
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
1836,10 → 1836,6
gen6_magic_ca_pass(sna, op);
}
 
if (op->mask.bo)
kgem_bo_destroy(&sna->kgem, op->mask.bo);
if (op->src.bo)
kgem_bo_destroy(&sna->kgem, op->src.bo);
 
// sna_render_composite_redirect_done(sna, op);
}
2708,8 → 2704,7
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
2718,6 → 2713,12
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
3494,6 → 3495,8
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
/drivers/video/Intel-2D/gen7_render.c
255,6 → 255,7
src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
1357,6 → 1358,14
}
}
 
fastcall static void
gen7_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
op->prim_emit(sna, op, r);
}
static uint32_t
gen7_composite_create_blend_state(struct sna_static_stream *stream)
{
1390,14 → 1399,6
}
 
 
fastcall static void
gen7_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
op->prim_emit(sna, op, r);
}
 
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
1502,6 → 1503,93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
1510,8 → 1598,6
assert(sna->render.vertex_offset == 0);
}
 
 
 
static void
gen7_render_context_switch(struct kgem *kgem,
int new_mode)
1593,7 → 1679,6
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
 
static bool gen7_render_setup(struct sna *sna)
{
struct gen7_render_state *state = &sna->render_state.gen7;
1680,7 → 1765,6
return state->general_bo != NULL;
}
 
 
bool gen7_render_init(struct sna *sna)
{
if (!gen7_render_setup(sna))
1698,6 → 1782,8
 
sna->render.max_3d_size = GEN7_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
/drivers/video/Intel-2D/i915_drm.h
944,6 → 944,8
 
struct drm_i915_mask_update {
__u32 handle;
__u32 width;
__u32 height;
__u32 bo_size;
__u32 bo_pitch;
__u32 bo_map;
/drivers/video/Intel-2D/kgem-sna.c
32,7 → 32,19
#include "sna.h"
#include "sna_reg.h"
 
static inline
int user_free(void *mem)
{
int val;
__asm__ __volatile__(
"int $0x40"
:"=a"(val)
:"a"(68),"b"(12),"c"(mem));
return val;
}
 
 
 
unsigned int cpu_cache_size();
 
static struct kgem_bo *
969,7 → 981,7
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
 
kgem->min_alignment = 4;
kgem->min_alignment = 16;
if (gen < 040)
kgem->min_alignment = 64;
 
1315,7 → 1327,7
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
1327,6 → 1339,8
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
printf("%s: handle=%d\n", __FUNCTION__, bo->handle);
 
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
assert(!bo->snoop || bo->rq == NULL);
1587,6 → 1601,8
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
printf("%s: handle=%d\n", __FUNCTION__, bo->handle);
assert(list_is_empty(&bo->list));
assert(bo->refcnt == 0);
assert(!bo->purged);
4198,7 → 4214,18
return 1;
};
 
void kgem_close_batches(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
};
 
 
 
/drivers/video/Intel-2D/kgem.h
28,7 → 28,7
#ifndef KGEM_H
#define KGEM_H
 
#define HAS_DEBUG_FULL 0
#define HAS_DEBUG_FULL 1
 
#include <stdint.h>
#include <stdbool.h>
/drivers/video/Intel-2D/sna.c
10,6 → 10,16
static struct sna_fb sna_fb;
static struct kgem_bo *mask_bo;
 
static int mask_width, mask_height;
 
static inline void delay(uint32_t time)
{
__asm__ __volatile__(
"int $0x40"
::"a"(5), "b"(time)
:"memory");
};
 
typedef struct __attribute__((packed))
{
unsigned handle;
96,7 → 106,7
sna->render.active = 0;
}
 
bool sna_accel_init(struct sna *sna)
int sna_accel_init(struct sna *sna)
{
const char *backend;
 
122,15 → 132,12
} else if (sna->info->gen >= 050) {
if (gen5_render_init(sna))
backend = "Ironlake";
/* } else if (sna->info->gen >= 040) {
} else if (sna->info->gen >= 040) {
if (gen4_render_init(sna))
backend = "Broadwater/Crestline";
} else if (sna->info->gen >= 030) {
/* } else if (sna->info->gen >= 030) {
if (gen3_render_init(sna))
backend = "gen3";
} else if (sna->info->gen >= 020) {
if (gen2_render_init(sna))
backend = "gen2"; */
backend = "gen3"; */
}
 
DBG(("%s(backend=%s, prefer_gpu=%x)\n",
156,10 → 163,12
 
DBG(("%s\n", __FUNCTION__));
 
sna = malloc(sizeof(struct sna));
sna = malloc(sizeof(*sna));
if (sna == NULL)
return false;
return 0;
 
memset(sna, 0, sizeof(*sna));
io.handle = service;
io.io_code = SRV_GET_PCI_INFO;
io.input = &device;
168,7 → 177,10
io.out_size = 0;
 
if (call_service(&io)!=0)
return false;
{
free(sna);
return 0;
};
 
sna->PciInfo = &device;
 
175,6 → 187,8
sna->info = intel_detect_chipset(sna->PciInfo);
 
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
delay(10);
/*
if (!xf86ReturnOptValBool(sna->Options,
OPTION_RELAXED_FENCING,
202,9 → 216,24
 
sna->flags = 0;
 
return sna_accel_init(sna);
sna_accel_init(sna);
 
delay(10);
return sna->render.caps;
}
 
void sna_fini()
{
if( sna_device )
{
sna_device->render.fini(sna_device);
kgem_bo_destroy(&sna_device->kgem, mask_bo);
kgem_close_batches(&sna_device->kgem);
kgem_cleanup_cache(&sna_device->kgem);
};
}
 
#if 0
 
static bool sna_solid_cache_init(struct sna *sna)
411,6 → 440,16
};
 
void sna_destroy_bitmap(bitmap_t *bitmap)
{
struct kgem_bo *bo;
bo = (struct kgem_bo *)bitmap->handle;
kgem_bo_destroy(&sna_device->kgem, bo);
 
};
 
void sna_lock_bitmap(bitmap_t *bitmap)
{
struct kgem_bo *bo;
424,18 → 463,12
int sna_create_mask()
{
struct kgem_bo *bo;
char proc_info[1024];
int width, height;
int i;
 
get_proc_info(proc_info);
printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height);
 
width = *(uint32_t*)(proc_info+42)+1;
height = *(uint32_t*)(proc_info+46)+1;
printf("%s width %d height %d\n", __FUNCTION__, width, height);
bo = kgem_create_2d(&sna_device->kgem, width, height,
bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height,
8,I915_TILING_NONE, CREATE_CPU_MAP);
if(bo == NULL)
448,6 → 481,8
memset(map, 0, bo->pitch * height);
mask_bo = bo;
mask_width = width;
mask_height = height;
 
return 0;
504,6 → 539,14
winw = *(uint32_t*)(proc_info+42)+1;
winh = *(uint32_t*)(proc_info+46)+1;
VG_CLEAR(update);
update.handle = mask_bo->handle;
// update.bo_size = __kgem_bo_size(mask_bo);
// update.bo_pitch = mask_bo->pitch;
update.bo_map = (__u32)MAP(mask_bo->map);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
mask_bo->pitch = update.bo_pitch;
memset(&src, 0, sizeof(src));
memset(&dst, 0, sizeof(dst));
memset(&mask, 0, sizeof(dst));
517,8 → 560,8
dst.drawable.height = sna_fb.height;
mask.drawable.bitsPerPixel = 8;
mask.drawable.width = winw;
mask.drawable.height = winh;
mask.drawable.width = update.width;
mask.drawable.height = update.height;
 
memset(&composite, 0, sizeof(composite));
 
550,13 → 593,6
composite.done(sna_device, &composite);
};
VG_CLEAR(update);
update.handle = mask_bo->handle;
update.bo_size = __kgem_bo_size(mask_bo);
update.bo_pitch = mask_bo->pitch;
update.bo_map = MAP(mask_bo->map);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
 
kgem_submit(&sna_device->kgem);
return 0;
/drivers/video/Intel-2D/sna.h
134,6 → 134,10
/* 4bpp formats */
} PictFormatShort;
 
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
 
#define PICT_FORMAT_A(f) PIXMAN_FORMAT_A(f)
 
#define RepeatNone 0
#define RepeatNormal 1
#define RepeatPad 2
/drivers/video/Intel-2D/sna_reg.h
51,6 → 51,7
#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16))
#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4)
#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
/drivers/video/Intel-2D/sna_render.h
10,6 → 10,10
 
#define GXinvalid 0xff
 
#define HW_BIT_BLIT (1<<0) /* BGRX blitter */
#define HW_TEX_BLIT (1<<1) /* stretch blit */
#define HW_VID_BLIT (1<<2) /* planar and packed video */
 
struct sna;
struct sna_glyph;
struct sna_video;
152,6 → 156,8
struct sna_render {
int active;
 
int caps;
 
int max_3d_size;
int max_3d_pitch;
 
160,7 → 166,6
#define PREFER_GPU_RENDER 0x2
#define PREFER_GPU_SPANS 0x4
 
 
bool (*composite)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src, PicturePtr mask,
int16_t src_x, int16_t src_y,
/drivers/video/Intel-2D/sna_render_inline.h
0,0 → 1,71
#ifndef SNA_RENDER_INLINE_H
#define SNA_RENDER_INLINE_H
 
static inline bool need_tiling(struct sna *sna, int16_t width, int16_t height)
{
/* Is the damage area too large to fit in 3D pipeline,
* and so do we need to split the operation up into tiles?
*/
return (width > sna->render.max_3d_size ||
height > sna->render.max_3d_size);
}
 
static inline bool need_redirect(struct sna *sna, PixmapPtr dst)
{
/* Is the pixmap too large to render to? */
return (dst->drawable.width > sna->render.max_3d_size ||
dst->drawable.height > sna->render.max_3d_size);
}
 
static inline float pack_2s(int16_t x, int16_t y)
{
union {
struct sna_coordinate p;
float f;
} u;
u.p.x = x;
u.p.y = y;
return u.f;
}
 
static inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
}
static inline void vertex_emit(struct sna *sna, float v)
{
assert(sna->render.vertex_used < sna->render.vertex_size);
sna->render.vertices[sna->render.vertex_used++] = v;
}
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
vertex_emit(sna, pack_2s(x, y));
}
 
static inline int batch_space(struct sna *sna)
{
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
}
 
static inline void batch_emit(struct sna *sna, uint32_t dword)
{
assert(sna->kgem.mode != KGEM_NONE);
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
sna->kgem.batch[sna->kgem.nbatch++] = dword;
}
 
static inline void batch_emit_float(struct sna *sna, float f)
{
union {
uint32_t dw;
float f;
} u;
u.f = f;
batch_emit(sna, u.dw);
}
 
 
 
#endif /* SNA_RENDER_INLINE_H */
/drivers/video/Intel-2D/sna_stream.c
116,7 → 116,6
return sna_static_stream_offsetof(stream, p.store);
}
 
 
unsigned
sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,