Subversion Repositories Kolibri OS

Compare Revisions

Problem with comparison.

Regard whitespace Rev HEAD → Rev 3769

/drivers/video/Intel-2D/gen3_render.c
0,0 → 1,1971
/*
* Copyright © 2010-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_render.h"
#include "sna_render_inline.h"
#include "sna_reg.h"
//#include "sna_video.h"
 
#include "gen3_render.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
 
#define PREFER_BLT_FILL 1
 
enum {
SHADER_NONE = 0,
SHADER_ZERO,
SHADER_BLACK,
SHADER_WHITE,
SHADER_CONSTANT,
SHADER_LINEAR,
SHADER_RADIAL,
SHADER_TEXTURE,
SHADER_OPACITY,
};
 
#define MAX_3D_SIZE 2048
#define MAX_3D_PITCH 8192
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_BATCH_F(v) batch_emit_float(sna, v)
#define OUT_VERTEX(v) vertex_emit(sna, v)
 
enum gen3_radial_mode {
RADIAL_ONE,
RADIAL_TWO
};
 
static const struct blendinfo {
bool dst_alpha;
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen3_blend_op[] = {
/* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
/* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
/* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
/* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
/* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
/* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
/* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
/* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
/* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
};
 
#define S6_COLOR_WRITE_ONLY \
(S6_COLOR_WRITE_ENABLE | \
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
 
static const struct formatinfo {
unsigned int fmt, xfmt;
uint32_t card_fmt;
bool rb_reversed;
} gen3_tex_formats[] = {
{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false}
};
 
#define xFixedToDouble(f) pixman_fixed_to_double(f)
 
static inline bool too_large(int width, int height)
{
return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
}
 
static inline uint32_t gen3_buf_tiling(uint32_t tiling)
{
uint32_t v = 0;
switch (tiling) {
case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
case I915_TILING_NONE: break;
}
return v;
}
static uint32_t gen3_get_blend_cntl(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t sblend;
uint32_t dblend;
 
sblend = BLENDFACT_ONE;
dblend = BLENDFACT_INV_SRC_ALPHA;
 
#if 0
if (op <= PictOpSrc) /* for clear and src disable blending */
return S6_COLOR_WRITE_ONLY;
 
/* If there's no dst alpha channel, adjust the blend op so that we'll
* treat it as always 1.
*/
if (gen3_blend_op[op].dst_alpha) {
if (PICT_FORMAT_A(dst_format) == 0) {
if (sblend == BLENDFACT_DST_ALPHA)
sblend = BLENDFACT_ONE;
else if (sblend == BLENDFACT_INV_DST_ALPHA)
sblend = BLENDFACT_ZERO;
}
 
/* gen3 engine reads 8bit color buffer into green channel
* in cases like color buffer blending etc., and also writes
* back green channel. So with dst_alpha blend we should use
* color factor. See spec on "8-bit rendering".
*/
if (dst_format == PICT_a8) {
if (sblend == BLENDFACT_DST_ALPHA)
sblend = BLENDFACT_DST_COLR;
else if (sblend == BLENDFACT_INV_DST_ALPHA)
sblend = BLENDFACT_INV_DST_COLR;
}
}
 
/* If the source alpha is being used, then we should only be in a case
* where the source blend factor is 0, and the source blend value is the
* mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen3_blend_op[op].src_alpha) {
if (dblend == BLENDFACT_SRC_ALPHA)
dblend = BLENDFACT_SRC_COLR;
else if (dblend == BLENDFACT_INV_SRC_ALPHA)
dblend = BLENDFACT_INV_SRC_COLR;
}
#endif
 
return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
}
static bool gen3_dst_rb_reversed(uint32_t format)
{
switch (format) {
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_a8:
return false;
default:
return true;
}
}
 
#define DSTORG_HORT_BIAS(x) ((x)<<20)
#define DSTORG_VERT_BIAS(x) ((x)<<16)
 
static uint32_t gen3_get_dst_format(uint32_t format)
{
#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
switch (format) {
default:
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return BIAS | COLR_BUF_ARGB8888;
case PICT_a8:
return BIAS | COLR_BUF_8BIT;
}
#undef BIAS
}
 
 
 
fastcall static void
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float dst_x, dst_y;
float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
 
dst_x = r->dst.x + op->dst.x;
dst_y = r->dst.y + op->dst.y;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 18;
 
v[0] = dst_x + w;
v[1] = dst_y + h;
v[2] = (src_x + w) * op->src.scale[0];
v[3] = (src_y + h) * op->src.scale[1];
v[4] = (msk_x + w) * op->mask.scale[0];
v[5] = (msk_y + h) * op->mask.scale[1];
 
v[6] = dst_x;
v[7] = v[1];
v[8] = src_x * op->src.scale[0];
v[9] = v[3];
v[10] = msk_x * op->mask.scale[0];
v[11] =v[5];
 
v[12] = v[6];
v[13] = dst_y;
v[14] = v[8];
v[15] = src_y * op->src.scale[1];
v[16] = v[10];
v[17] = msk_y * op->mask.scale[1];
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static inline void
gen3_2d_perspective(struct sna *sna, int in, int out)
{
gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
gen3_fs_mul(out,
gen3_fs_operand(in, X, Y, ZERO, ONE),
gen3_fs_operand_reg(out));
}
 
static inline void
gen3_linear_coord(struct sna *sna,
const struct sna_composite_channel *channel,
int in, int out)
{
int c = channel->u.gen3.constants;
 
if (!channel->is_affine) {
gen3_2d_perspective(sna, in, FS_U0);
in = FS_U0;
}
 
gen3_fs_mov(out, gen3_fs_operand_zero());
gen3_fs_dp3(out, MASK_X,
gen3_fs_operand(in, X, Y, ONE, ZERO),
gen3_fs_operand_reg(c));
}
 
static void
gen3_radial_coord(struct sna *sna,
const struct sna_composite_channel *channel,
int in, int out)
{
int c = channel->u.gen3.constants;
 
if (!channel->is_affine) {
gen3_2d_perspective(sna, in, FS_U0);
in = FS_U0;
}
 
switch (channel->u.gen3.mode) {
case RADIAL_ONE:
/*
pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
r? = pdx*pdx + pdy*pdy
t = r?/sqrt(r?) - r1/dr;
*/
gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
gen3_fs_operand(in, X, Y, ZERO, ZERO),
gen3_fs_operand(c, Z, Z, ZERO, ZERO),
gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
gen3_fs_dp2add(FS_U0, MASK_X,
gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
gen3_fs_operand_zero());
gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
gen3_fs_mad(out, 0,
gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
break;
 
case RADIAL_TWO:
/*
pdx = x - c1x, pdy = y - c1y;
A = dx? + dy? - dr?
B = -2*(pdx*dx + pdy*dy + r1*dr);
C = pdx? + pdy? - r1?;
det = B*B - 4*A*C;
t = (-B + sqrt (det)) / (2 * A)
*/
 
/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
gen3_fs_add(FS_U0,
gen3_fs_operand(in, X, Y, ZERO, ZERO),
gen3_fs_operand(c, X, Y, Z, ZERO));
/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
gen3_fs_dp3(FS_U0, MASK_W,
gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
gen3_fs_operand(c+1, X, Y, Z, ZERO));
/* u1.x = pdx? + pdy? - r1?; [C] */
gen3_fs_dp3(FS_U1, MASK_X,
gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
/* u1.x = C, u1.y = B, u1.z=-4*A; */
gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
/* u1.x = B? - 4*A*C */
gen3_fs_dp2add(FS_U1, MASK_X,
gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
gen3_fs_operand_zero());
/* out.x = -B + sqrt (B? - 4*A*C), */
gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
gen3_fs_mad(out, MASK_X,
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
/* out.x = (-B + sqrt (B? - 4*A*C)) / (2 * A), */
gen3_fs_mul(out,
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
break;
}
}
 
static void
gen3_composite_emit_shader(struct sna *sna,
const struct sna_composite_op *op,
uint8_t blend)
{
bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
const struct sna_composite_channel *src, *mask;
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t shader_offset, id;
int src_reg, mask_reg;
int t, length;
 
src = &op->src;
mask = &op->mask;
if (mask->u.gen3.type == SHADER_NONE)
mask = NULL;
 
id = (src->u.gen3.type |
src->is_affine << 4 |
src->alpha_fixup << 5 |
src->rb_reversed << 6);
if (mask) {
id |= (mask->u.gen3.type << 8 |
mask->is_affine << 12 |
gen3_blend_op[blend].src_alpha << 13 |
op->has_component_alpha << 14 |
mask->alpha_fixup << 15 |
mask->rb_reversed << 16);
}
id |= dst_is_alpha << 24;
id |= op->rb_reversed << 25;
 
if (id == state->last_shader)
return;
 
state->last_shader = id;
 
shader_offset = sna->kgem.nbatch++;
t = 0;
switch (src->u.gen3.type) {
case SHADER_NONE:
case SHADER_OPACITY:
assert(0);
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
gen3_fs_dcl(FS_T8);
src_reg = FS_T8;
break;
case SHADER_TEXTURE:
case SHADER_RADIAL:
case SHADER_LINEAR:
gen3_fs_dcl(FS_S0);
gen3_fs_dcl(FS_T0);
t++;
break;
}
 
if (mask == NULL) {
switch (src->u.gen3.type) {
case SHADER_ZERO:
gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
goto done;
case SHADER_BLACK:
if (dst_is_alpha)
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
else
gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
goto done;
case SHADER_WHITE:
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
goto done;
}
if (src->alpha_fixup && dst_is_alpha) {
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
goto done;
}
/* No mask, so load directly to output color */
if (src->u.gen3.type != SHADER_CONSTANT) {
if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
src_reg = FS_R0;
else
src_reg = FS_OC;
}
switch (src->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(src_reg, FS_S0, FS_R0);
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(src_reg, FS_S0, FS_R0);
break;
 
case SHADER_TEXTURE:
if (src->is_affine)
gen3_fs_texld(src_reg, FS_S0, FS_T0);
else
gen3_fs_texldp(src_reg, FS_S0, FS_T0);
break;
 
case SHADER_NONE:
case SHADER_WHITE:
case SHADER_BLACK:
case SHADER_ZERO:
assert(0);
case SHADER_CONSTANT:
break;
}
 
if (src_reg != FS_OC) {
if (src->alpha_fixup)
gen3_fs_mov(FS_OC,
src->rb_reversed ^ op->rb_reversed ?
gen3_fs_operand(src_reg, Z, Y, X, ONE) :
gen3_fs_operand(src_reg, X, Y, Z, ONE));
else if (dst_is_alpha)
gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
else if (src->rb_reversed ^ op->rb_reversed)
gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
else
gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
} else if (src->alpha_fixup)
gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
} else {
int out_reg = FS_OC;
if (op->rb_reversed)
out_reg = FS_U0;
 
switch (mask->u.gen3.type) {
case SHADER_CONSTANT:
gen3_fs_dcl(FS_T9);
mask_reg = FS_T9;
break;
case SHADER_TEXTURE:
case SHADER_LINEAR:
case SHADER_RADIAL:
gen3_fs_dcl(FS_S0 + t);
/* fall through */
case SHADER_OPACITY:
gen3_fs_dcl(FS_T0 + t);
break;
case SHADER_ZERO:
case SHADER_BLACK:
assert(0);
case SHADER_NONE:
case SHADER_WHITE:
break;
}
 
t = 0;
switch (src->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(FS_R0, FS_S0, FS_R0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(FS_R0, FS_S0, FS_R0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_TEXTURE:
if (src->is_affine)
gen3_fs_texld(FS_R0, FS_S0, FS_T0);
else
gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_CONSTANT:
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
}
if (src->alpha_fixup)
gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
if (src->rb_reversed)
gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
 
switch (mask->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
mask_reg = FS_R1;
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
mask_reg = FS_R1;
break;
 
case SHADER_TEXTURE:
if (mask->is_affine)
gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
else
gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
mask_reg = FS_R1;
break;
 
case SHADER_OPACITY:
switch (src->u.gen3.type) {
case SHADER_BLACK:
case SHADER_WHITE:
if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
gen3_fs_mov(out_reg,
gen3_fs_operand(FS_T0 + t, X, X, X, X));
} else {
gen3_fs_mov(out_reg,
gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
}
break;
default:
if (dst_is_alpha) {
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand(FS_T0 + t, X, X, X, X));
} else {
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, X, Y, Z, W),
gen3_fs_operand(FS_T0 + t, X, X, X, X));
}
}
goto mask_done;
 
case SHADER_CONSTANT:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_NONE:
break;
}
if (mask->alpha_fixup)
gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
if (mask->rb_reversed)
gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
 
if (dst_is_alpha) {
switch (src->u.gen3.type) {
case SHADER_BLACK:
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, W, W, W, W));
break;
default:
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand(mask_reg, W, W, W, W));
break;
}
} else {
/* If component alpha is active in the mask and the blend
* operation uses the source alpha, then we know we don't
* need the source value (otherwise we would have hit a
* fallback earlier), so we provide the source alpha (src.A *
* mask.X) as output color.
* Conversely, if CA is set and we don't need the source alpha,
* then we produce the source value (src.X * mask.X) and the
* source alpha is unused. Otherwise, we provide the non-CA
* source value (src.X * mask.A).
*/
if (op->has_component_alpha) {
switch (src->u.gen3.type) {
case SHADER_BLACK:
if (gen3_blend_op[blend].src_alpha)
gen3_fs_mov(out_reg,
gen3_fs_operand_reg(mask_reg));
else
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
break;
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand_reg(mask_reg));
break;
default:
if (gen3_blend_op[blend].src_alpha)
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand_reg(mask_reg));
else
gen3_fs_mul(out_reg,
gen3_fs_operand_reg(src_reg),
gen3_fs_operand_reg(mask_reg));
break;
}
} else {
switch (src->u.gen3.type) {
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, W, W, W, W));
break;
case SHADER_BLACK:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
break;
default:
gen3_fs_mul(out_reg,
gen3_fs_operand_reg(src_reg),
gen3_fs_operand(mask_reg, W, W, W, W));
break;
}
}
}
mask_done:
if (op->rb_reversed)
gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
}
 
done:
length = sna->kgem.nbatch - shader_offset;
sna->kgem.batch[shader_offset] =
_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
}
 
static uint32_t gen3_ms_tiling(uint32_t tiling)
{
uint32_t v = 0;
switch (tiling) {
case I915_TILING_Y: v |= MS3_TILE_WALK;
case I915_TILING_X: v |= MS3_TILED_SURFACE;
case I915_TILING_NONE: break;
}
return v;
}
 
static void gen3_emit_invariant(struct sna *sna)
{
/* Disable independent alpha blend */
OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
 
OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) |
CSB_TCB(6, 6) |
CSB_TCB(7, 7));
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE |
S4_VFMT_XY);
OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
 
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
 
OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
OUT_BATCH(0x00000000);
 
OUT_BATCH(_3DSTATE_STIPPLE);
OUT_BATCH(0x00000000);
 
sna->render_state.gen3.need_invariant = false;
}
 
#define MAX_OBJECTS 3 /* worst case: dst + src + mask */
 
static void
gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch(&sna->kgem, 200)) {
DBG(("%s: flushing batch: size %d > %d\n",
__FUNCTION__, 200,
sna->kgem.surface-sna->kgem.nbatch));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
DBG(("%s: flushing batch: reloc %d >= %d\n",
__FUNCTION__,
sna->kgem.nreloc,
(int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
DBG(("%s: flushing batch: exec %d >= %d\n",
__FUNCTION__,
sna->kgem.nexec,
(int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen3.need_invariant)
gen3_emit_invariant(sna);
#undef MAX_OBJECTS
}
 
static void gen3_emit_target(struct sna *sna,
struct kgem_bo *bo,
int width,
int height,
int format)
{
struct gen3_render_state *state = &sna->render_state.gen3;
 
assert(!too_large(width, height));
 
/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
assert(bo->unique_id != 0);
if (bo->unique_id != state->current_dst) {
uint32_t v;
 
DBG(("%s: setting new target id=%d, handle=%d\n",
__FUNCTION__, bo->unique_id, bo->handle));
 
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_COLOR_BACK |
gen3_buf_tiling(bo->tiling) |
bo->pitch);
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
bo,
I915_GEM_DOMAIN_RENDER << 16 |
I915_GEM_DOMAIN_RENDER,
0));
 
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(gen3_get_dst_format(format));
 
v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
if (v != state->last_drawrect_limit) {
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(0); /* XXX dither origin? */
OUT_BATCH(0);
OUT_BATCH(v);
OUT_BATCH(0);
state->last_drawrect_limit = v;
}
 
state->current_dst = bo->unique_id;
}
kgem_bo_mark_dirty(bo);
}
 
static void gen3_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t map[4];
uint32_t sampler[4];
struct kgem_bo *bo[2];
unsigned int tex_count, n;
uint32_t ss2;
 
gen3_get_batch(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
else
OUT_BATCH(_3DSTATE_MODES_5_CMD |
PIPELINE_FLUSH_RENDER_CACHE |
PIPELINE_FLUSH_TEXTURE_CACHE);
kgem_clear_dirty(&sna->kgem);
}
 
gen3_emit_target(sna,
op->dst.bo,
op->dst.width,
op->dst.height,
op->dst.format);
 
ss2 = ~0;
tex_count = 0;
switch (op->src.u.gen3.type) {
case SHADER_OPACITY:
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->src.u.gen3.mode != state->last_diffuse) {
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(op->src.u.gen3.mode);
state->last_diffuse = op->src.u.gen3.mode;
}
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
case SHADER_TEXTURE:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
map[tex_count * 2 + 0] =
op->src.card_format |
gen3_ms_tiling(op->src.bo->tiling) |
(op->src.height - 1) << MS3_HEIGHT_SHIFT |
(op->src.width - 1) << MS3_WIDTH_SHIFT;
map[tex_count * 2 + 1] =
(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
 
sampler[tex_count * 2 + 0] = op->src.filter;
sampler[tex_count * 2 + 1] =
op->src.repeat |
tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
bo[tex_count] = op->src.bo;
tex_count++;
break;
}
switch (op->mask.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->mask.u.gen3.mode != state->last_specular) {
OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
OUT_BATCH(op->mask.u.gen3.mode);
state->last_specular = op->mask.u.gen3.mode;
}
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
case SHADER_TEXTURE:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
map[tex_count * 2 + 0] =
op->mask.card_format |
gen3_ms_tiling(op->mask.bo->tiling) |
(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
(op->mask.width - 1) << MS3_WIDTH_SHIFT;
map[tex_count * 2 + 1] =
(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
 
sampler[tex_count * 2 + 0] = op->mask.filter;
sampler[tex_count * 2 + 1] =
op->mask.repeat |
tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
bo[tex_count] = op->mask.bo;
tex_count++;
break;
case SHADER_OPACITY:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
break;
}
 
{
uint32_t blend_offset = sna->kgem.nbatch;
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
OUT_BATCH(ss2);
OUT_BATCH(gen3_get_blend_cntl(op->op,
op->has_component_alpha,
op->dst.format));
 
if (memcmp(sna->kgem.batch + state->last_blend + 1,
sna->kgem.batch + blend_offset + 1,
2 * 4) == 0)
sna->kgem.nbatch = blend_offset;
else
state->last_blend = blend_offset;
}
 
if (op->u.gen3.num_constants) {
int count = op->u.gen3.num_constants;
if (state->last_constants) {
int last = sna->kgem.batch[state->last_constants+1];
if (last == (1 << (count >> 2)) - 1 &&
memcmp(&sna->kgem.batch[state->last_constants+2],
op->u.gen3.constants,
count * sizeof(uint32_t)) == 0)
count = 0;
}
if (count) {
state->last_constants = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
OUT_BATCH((1 << (count >> 2)) - 1);
 
memcpy(sna->kgem.batch + sna->kgem.nbatch,
op->u.gen3.constants,
count * sizeof(uint32_t));
sna->kgem.nbatch += count;
}
}
 
if (tex_count != 0) {
uint32_t rewind;
 
n = 0;
if (tex_count == state->tex_count) {
for (; n < tex_count; n++) {
if (map[2*n+0] != state->tex_map[2*n+0] ||
map[2*n+1] != state->tex_map[2*n+1] ||
state->tex_handle[n] != bo[n]->handle ||
state->tex_delta[n] != bo[n]->delta)
break;
}
}
if (n < tex_count) {
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
OUT_BATCH((1 << tex_count) - 1);
for (n = 0; n < tex_count; n++) {
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
bo[n],
I915_GEM_DOMAIN_SAMPLER<< 16,
0));
OUT_BATCH(map[2*n + 0]);
OUT_BATCH(map[2*n + 1]);
 
state->tex_map[2*n+0] = map[2*n+0];
state->tex_map[2*n+1] = map[2*n+1];
state->tex_handle[n] = bo[n]->handle;
state->tex_delta[n] = bo[n]->delta;
}
state->tex_count = n;
}
 
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
OUT_BATCH((1 << tex_count) - 1);
for (n = 0; n < tex_count; n++) {
OUT_BATCH(sampler[2*n + 0]);
OUT_BATCH(sampler[2*n + 1]);
OUT_BATCH(0);
}
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler+1],
&sna->kgem.batch[rewind + 1],
(3*tex_count + 1)*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
}
 
gen3_composite_emit_shader(sna, op, op->op);
}
 
static bool gen3_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s(%d)\n", __FUNCTION__,
sna->render.vertex_index - sna->render.vertex_start));
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
gen3_composite_emit_shader(sna, op, PictOpAdd);
 
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
(sna->render.vertex_index - sna->render.vertex_start));
OUT_BATCH(sna->render.vertex_start);
 
sna->render_state.gen3.last_blend = 0;
return true;
}
 
static void gen3_vertex_flush(struct sna *sna)
{
assert(sna->render.vertex_offset);
 
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
 
sna->kgem.batch[sna->render.vertex_offset] =
PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
(sna->render.vertex_index - sna->render.vertex_start);
sna->kgem.batch[sna->render.vertex_offset + 1] =
sna->render.vertex_start;
 
sna->render.vertex_offset = 0;
}
 
static int gen3_vertex_finish(struct sna *sna)
{
struct kgem_bo *bo;
 
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
sna_vertex_wait__locked(&sna->render);
 
bo = sna->render.vbo;
if (bo) {
DBG(("%s: reloc = %d\n", __FUNCTION__,
sna->render.vertex_reloc[0]));
 
if (sna->render.vertex_reloc[0]) {
sna->kgem.batch[sna->render.vertex_reloc[0]] =
kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
 
sna->render.vertex_reloc[0] = 0;
}
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render.vbo = NULL;
 
kgem_bo_destroy(&sna->kgem, bo);
}
 
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem,
256*1024, CREATE_GTT_MAP);
if (sna->render.vbo)
sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
if (sna->render.vbo)
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
return 0;
}
assert(sna->render.vbo->snoop == false);
 
if (sna->render.vertex_used) {
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
}
sna->render.vertex_size = 64 * 1024 - 1;
return sna->render.vertex_size - sna->render.vertex_used;
}
 
static void gen3_vertex_close(struct sna *sna)
{
struct kgem_bo *bo, *free_bo = NULL;
unsigned int delta = 0;
 
assert(sna->render.vertex_offset == 0);
if (sna->render.vertex_reloc[0] == 0)
return;
 
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
 
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding full vbo\n", __FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map)) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
if (sna->render.vertices == NULL) {
DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
}
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
memcpy(sna->kgem.batch + sna->kgem.nbatch,
sna->render.vertex_data,
sna->render.vertex_used * 4);
delta = sna->kgem.nbatch * 4;
bo = NULL;
sna->kgem.nbatch += sna->render.vertex_used;
} else {
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
bo = kgem_create_linear(&sna->kgem,
4*sna->render.vertex_used,
CREATE_NO_THROTTLE);
if (bo) {
assert(bo->snoop == false);
kgem_bo_write(&sna->kgem, bo,
sna->render.vertex_data,
4*sna->render.vertex_used);
}
free_bo = bo;
}
}
 
DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
sna->kgem.batch[sna->render.vertex_reloc[0]] =
kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
sna->render.vertex_reloc[0] = 0;
 
if (sna->render.vbo == NULL) {
DBG(("%s: resetting vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
}
 
if (free_bo)
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
static bool gen3_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen3_render_state *state = &sna->render_state.gen3;
int ndwords, i1_cmd = 0, i1_len = 0;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = 2;
if (op->need_magic_ca_pass)
ndwords += 100;
if (sna->render.vertex_reloc[0] == 0)
i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
if (state->floats_per_vertex != op->floats_per_vertex)
i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
 
if (!kgem_check_batch(&sna->kgem, ndwords+1))
return false;
 
if (i1_cmd) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
if (sna->render.vertex_reloc[0] == 0)
sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
if (state->floats_per_vertex != op->floats_per_vertex) {
state->floats_per_vertex = op->floats_per_vertex;
OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
}
}
 
if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
!op->need_magic_ca_pass) {
sna->render.vertex_offset = state->last_vertex_offset;
} else {
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(MI_NOOP); /* to be filled later */
OUT_BATCH(MI_NOOP);
sna->render.vertex_start = sna->render.vertex_index;
state->last_vertex_offset = sna->render.vertex_offset;
}
 
return true;
}
 
static int gen3_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
return 0;
 
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
if (gen3_magic_ca_pass(sna, op)) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(gen3_get_blend_cntl(op->op,
op->has_component_alpha,
op->dst.format));
gen3_composite_emit_shader(sna, op, op->op);
}
}
 
return gen3_vertex_finish(sna);
}
 
inline static int gen3_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want)
{
int rem;
 
DBG(("%s: want=%d, rem=%d\n",
__FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
 
assert(want);
assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
 
start:
rem = vertex_space(sna);
if (unlikely(op->floats_per_rect > rem)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen3_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen3_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
 
assert(want);
assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
return want;
 
flush:
DBG(("%s: flushing batch\n", __FUNCTION__));
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
gen3_emit_composite_state(sna, op);
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_reloc[0] == 0);
goto start;
}
 
fastcall static void
gen3_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen3_get_rectangles(sna, op, 1);
 
op->prim_emit(sna, op, r);
}
 
static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
 
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen3_render_reset(struct sna *sna)
{
struct gen3_render_state *state = &sna->render_state.gen3;
 
state->need_invariant = true;
state->current_dst = 0;
state->tex_count = 0;
state->last_drawrect_limit = ~0U;
state->last_target = 0;
state->last_blend = 0;
state->last_constants = 0;
state->last_sampler = 0;
state->last_shader = 0x7fffffff;
state->last_diffuse = 0xcc00ffee;
state->last_specular = 0xcc00ffee;
 
state->floats_per_vertex = 0;
state->last_floats_per_vertex = 0;
state->last_vertex_offset = 0;
 
if (sna->render.vbo != NULL &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding vbo as next access will stall: %d\n",
__FUNCTION__, sna->render.vbo->presumed_offset));
discard_vbo(sna);
}
 
sna->render.vertex_reloc[0] = 0;
sna->render.vertex_offset = 0;
}
 
static void
gen3_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vertex_reloc[0] == 0 &&
sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen3_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
CARD32 format)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
if (gen3_tex_formats[i].fmt == format) {
channel->card_format = gen3_tex_formats[i].card_fmt;
channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
return true;
}
}
return false;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
 
tmp->u.gen3.num_constants = 0;
tmp->src.u.gen3.type = SHADER_TEXTURE;
tmp->src.is_affine = true;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
gen3_composite_channel_set_format(&tmp->src, tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->mask.u.gen3.type = SHADER_TEXTURE;
tmp->mask.is_affine = true;
tmp->need_magic_ca_pass = false;
tmp->has_component_alpha = false;
 
 
tmp->mask.repeat = RepeatNone;
tmp->mask.filter = PictFilterNearest;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
gen3_composite_channel_set_format(&tmp->mask, tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
 
 
tmp->floats_per_vertex = 2;
if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
!is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
!is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
tmp->floats_per_vertex,
tmp->prim_emit != gen3_emit_composite_primitive));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
tmp->blt = gen3_render_composite_blt;
 
tmp->done = gen3_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
}
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
bool gen3_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
 
// render->video = gen3_render_video;
 
render->blit_tex = gen3_blit_tex;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return true;
}
/drivers/video/Intel-2D/gen4_render.c
0,0 → 1,1448
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen4_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
/* gen4 has a serious issue with its shaders that we need to flush
* after every rectangle... So until that is resolved, prefer
* the BLT engine.
*/
#define FORCE_SPANS 0
#define FORCE_NONRECTILINEAR_SPANS -1
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
 
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
#define URB_CS_ENTRY_SIZE 1
#define URB_CS_ENTRIES 0
 
#define URB_VS_ENTRY_SIZE 1
#define URB_VS_ENTRIES 32
 
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
 
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
 
#define URB_SF_ENTRY_SIZE 2
#define URB_SF_ENTRIES 64
 
/*
* this program computes dA/dx and dA/dy for the texture coordinates along
* with the base texture coordinate. It was extracted from the Mesa driver
*/
 
#define SF_KERNEL_NUM_GRF 16
#define PS_KERNEL_NUM_GRF 32
 
#define GEN4_MAX_SF_THREADS 24
#define GEN4_MAX_WM_THREADS 32
#define G4X_MAX_WM_THREADS 50
 
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
static const uint32_t ps_kernel_planar_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_planar.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
#define NOKERNEL(kernel_enum, func, masked) \
[kernel_enum] = {func, 0, masked}
#define KERNEL(kernel_enum, kernel, masked) \
[kernel_enum] = {&kernel, sizeof(kernel), masked}
static const struct wm_kernel_info {
const void *data;
unsigned int size;
bool has_mask;
} wm_kernels[] = {
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen4_blend_op[] = {
/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen4_blend_op.
*
* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
 
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel);
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
#define GEN4_MAX_3D_SIZE 8192
 
static inline bool too_large(int width, int height)
{
return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
}
 
static int
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen4_blend_op[op].src_alpha)
base = WM_KERNEL_MASKSA;
else
base = WM_KERNEL_MASKCA;
} else
base = WM_KERNEL_MASK;
} else
base = WM_KERNEL;
 
return base + !is_affine;
}
 
static bool gen4_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen4_render_state *state = &sna->render_state.gen4;
 
if (!op->need_magic_ca_pass)
return false;
 
assert(sna->render.vertex_index > sna->render.vertex_start);
 
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
 
gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
gen4_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static uint32_t gen4_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN4_BLENDFACTOR_DST_ALPHA)
src = GEN4_BLENDFACTOR_ONE;
else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
src = GEN4_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen4_blend_op[op].src_alpha) {
if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen4_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
}
}
 
static uint32_t gen4_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
}
}
 
typedef struct gen4_surface_state_padded {
struct gen4_surface_state state;
char pad[32 - sizeof(struct gen4_surface_state)];
} gen4_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen4_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
break;
}
}
 
static uint32_t
gen4_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN4_SURFACE_TILED;
case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen4_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t domains;
uint16_t offset;
uint32_t *ss;
 
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
 
ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
GEN4_SURFACE_BLEND_ENABLED |
format << GEN4_SURFACE_FORMAT_SHIFT);
 
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
ss[3] = (gen4_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen4_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
 
assert((sna->render.vb_id & (1 << id)) == 0);
 
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen4_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
}
 
static bool gen4_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & (1 << id)) == 0)
gen4_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
 
return true;
}
 
static int gen4_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 25 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen4_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen4_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen4_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
static uint32_t *
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
/* Clear all surplus entries to zero in case of prefetch */
*offset = sna->kgem.surface;
return memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen4_surface_state_padded));
}
 
static void
gen4_emit_urb(struct sna *sna)
{
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
int urb_clip_start, urb_clip_size;
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
 
if (!sna->render_state.gen4.needs_urb)
return;
 
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
urb_clip_start = urb_gs_start + urb_gs_size;
urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
urb_sf_start = urb_clip_start + urb_clip_size;
urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
while ((sna->kgem.nbatch & 15) > 12)
OUT_BATCH(MI_NOOP);
 
OUT_BATCH(GEN4_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
UF0_CLIP_REALLOC |
UF0_GS_REALLOC |
UF0_VS_REALLOC |
1);
OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 
/* Constant buffer state */
OUT_BATCH(GEN4_CS_URB_STATE | 0);
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
 
sna->render_state.gen4.needs_urb = false;
}
 
static void
gen4_emit_state_base_address(struct sna *sna)
{
assert(sna->render_state.gen4.general_bo->proxy == NULL);
OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
sna->kgem.nbatch,
sna->render_state.gen4.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* media */
 
/* upper bounds, all disabled */
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
}
 
static void
gen4_emit_invariant(struct sna *sna)
{
assert(sna->kgem.surface == sna->kgem.batch_size);
 
if (sna->kgem.gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen4_emit_state_base_address(sna);
 
sna->render_state.gen4.needs_invariant = false;
}
 
static void
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen4.needs_invariant)
gen4_emit_invariant(sna);
}
 
static void
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen4.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
}
}
 
static void
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen4.surface_table == offset)
return;
 
sna->render_state.gen4.surface_table = offset;
 
/* Binding table pointers */
OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
}
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
uint16_t sp, bp;
uint32_t key;
 
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
__FUNCTION__, op->u.gen4.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel);
bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16;
if (key == sna->render_state.gen4.last_pipelined_pointers)
return;
 
OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen4.vs);
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen4.sf);
OUT_BATCH(sna->render_state.gen4.wm + sp);
OUT_BATCH(sna->render_state.gen4.cc + bp);
 
sna->render_state.gen4.last_pipelined_pointers = key;
gen4_emit_urb(sna);
}
 
static bool
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen4.drawrect_limit == limit &&
sna->render_state.gen4.drawrect_offset == offset)
return true;
 
sna->render_state.gen4.drawrect_offset = offset;
sna->render_state.gen4.drawrect_limit = limit;
 
OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
return false;
}
 
static void
gen4_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen4_render_state *render = &sna->render_state.gen4;
uint32_t src_format, dw;
int id = op->u.gen4.ve_id;
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
*/
OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u0, v0, w0 */
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u1, v1, w1 */
if (id >> 2) {
unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
id >> 2, src_offset));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
src_offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
} else {
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
}
}
 
static void
gen4_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool flush;
 
flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo),
flush));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
flush &= gen4_emit_drawing_rectangle(sna, op);
if (flush && op->op > PictOpSrc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
gen4_emit_binding_table(sna, wm_binding_table & ~1);
gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
gen4_emit_vertex_elements(sna, op);
}
 
static void
gen4_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen4_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
assert(op->u.gen4.ve_id >> 2);
binding_table[2] =
gen4_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen4.surface_table;
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
fastcall static void
gen4_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, r);
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen4_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
 
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
static void
gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen4_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen4_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen4_render_reset(struct sna *sna)
{
sna->render_state.gen4.needs_invariant = true;
sna->render_state.gen4.needs_urb = true;
sna->render_state.gen4.ve_id = -1;
sna->render_state.gen4.last_primitive = -1;
sna->render_state.gen4.last_pipelined_pointers = -1;
 
sna->render_state.gen4.drawrect_offset = -1;
sna->render_state.gen4.drawrect_limit = -1;
sna->render_state.gen4.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen4_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
}
 
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
{
struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
/* Set up the vertex shader to be disabled (passthrough) */
vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs->vs6.vs_enable = 0;
vs->vs6.vert_cache_disable = 1;
 
return sna_static_stream_offsetof(stream, vs);
}
 
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
int gen, uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
 
sf = sna_static_stream_map(stream, sizeof(*sf), 32);
 
sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf->thread0.kernel_start_pointer = kernel >> 6;
sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
/* don't smash vertex header, read start from dw8 */
sf->thread3.urb_entry_read_offset = 1;
sf->thread3.dispatch_grf_start_reg = 3;
sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf->sf5.viewport_transform = false; /* skip viewport */
sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
sf->sf6.scissor = 0;
sf->sf7.trifan_pv = 2;
sf->sf6.dest_org_vbias = 0x8;
sf->sf6.dest_org_hbias = 0x8;
 
return sna_static_stream_offsetof(stream, sf);
}
 
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
sampler_filter_t src_filter,
sampler_extend_t src_extend,
sampler_filter_t mask_filter,
sampler_extend_t mask_extend)
{
struct gen4_sampler_state *sampler_state;
 
sampler_state = sna_static_stream_map(stream,
sizeof(struct gen4_sampler_state) * 2,
32);
sampler_state_init(&sampler_state[0], src_filter, src_extend);
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
return sna_static_stream_offsetof(stream, sampler_state);
}
 
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
int gen,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
{
assert((kernel & 63) == 0);
wm->thread0.kernel_start_pointer = kernel >> 6;
wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
 
wm->thread1.single_program_flow = 0;
 
wm->thread3.const_urb_entry_read_length = 0;
wm->thread3.const_urb_entry_read_offset = 0;
 
wm->thread3.urb_entry_read_offset = 0;
wm->thread3.dispatch_grf_start_reg = 3;
 
assert((sampler & 31) == 0);
wm->wm4.sampler_state_pointer = sampler >> 5;
wm->wm4.sampler_count = 1;
 
wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
wm->wm5.transposed_urb_read = 0;
wm->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
* start point
*/
wm->wm5.enable_16_pix = 1;
wm->wm5.enable_8_pix = 0;
wm->wm5.early_depth_test = 1;
 
/* Each pair of attributes (src/mask coords) is two URB entries */
if (has_mask) {
wm->thread1.binding_table_entry_count = 3;
wm->thread3.urb_entry_read_length = 4;
} else {
wm->thread1.binding_table_entry_count = 2;
wm->thread3.urb_entry_read_length = 2;
}
}
 
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
int i, j;
 
base = ptr =
sna_static_stream_map(stream,
GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
64);
 
for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
struct gen4_cc_unit_state *state =
(struct gen4_cc_unit_state *)ptr;
 
state->cc3.blend_enable =
!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
 
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
 
/* Fill in alpha blend factors same as color, for the future. */
state->cc5.ia_src_blend_factor = i;
state->cc5.ia_dest_blend_factor = j;
 
state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
state->cc6.clamp_post_alpha_blend = 1;
state->cc6.clamp_pre_alpha_blend = 1;
state->cc6.src_blend_factor = i;
state->cc6.dest_blend_factor = j;
 
ptr += 64;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
static bool gen4_render_setup(struct sna *sna)
{
struct gen4_render_state *state = &sna->render_state.gen4;
struct sna_static_stream general;
struct gen4_wm_unit_state_padded *wm_state;
uint32_t sf, wm[KERNEL_COUNT];
int i, j, k, l, m;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
wm[m] = sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data,
16);
}
}
 
state->vs = gen4_create_vs_unit_state(&general);
state->sf = gen4_create_sf_state(&general, sna->kgem.gen, sf);
 
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT,
64);
state->wm = sna_static_stream_offsetof(&general, wm_state);
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
uint32_t sampler_state;
 
sampler_state =
gen4_create_sampler_state(&general,
i, j,
k, l);
 
for (m = 0; m < KERNEL_COUNT; m++) {
gen4_init_wm_state(&wm_state->state,
sna->kgem.gen,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
}
}
}
}
}
 
state->cc = gen4_create_cc_unit_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
 
bool gen4_render_init(struct sna *sna)
{
if (!gen4_render_setup(sna))
return false;
 
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
 
sna->render.blit_tex = gen4_blit_tex;
 
 
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
sna->render.fini = gen4_render_fini;
 
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
/drivers/video/Intel-2D/gen5_render.c
0,0 → 1,1420
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen5_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
 
#define PREFER_BLT_FILL 1
 
#define DBG_NO_STATE_CACHE 0
#define DBG_NO_SURFACE_CACHE 0
 
#define MAX_3D_SIZE 8192
 
#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
#define URB_CS_ENTRY_SIZE 1
#define URB_CS_ENTRIES 0
 
#define URB_VS_ENTRY_SIZE 1
#define URB_VS_ENTRIES 256 /* minimum of 8 */
 
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
 
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
 
#define URB_SF_ENTRY_SIZE 2
#define URB_SF_ENTRIES 64
 
/*
* this program computes dA/dx and dA/dy for the texture coordinates along
* with the base texture coordinate. It was extracted from the Mesa driver
*/
 
#define SF_KERNEL_NUM_GRF 16
#define SF_MAX_THREADS 48
 
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 72
 
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g5b"
#include "exa_wm_src_affine.g5b"
#include "exa_wm_src_sample_argb.g5b"
#include "exa_wm_yuv_rgb.g5b"
#include "exa_wm_write.g5b"
};
 
static const uint32_t ps_kernel_planar_static[][4] = {
#include "exa_wm_xy.g5b"
#include "exa_wm_src_affine.g5b"
#include "exa_wm_src_sample_planar.g5b"
#include "exa_wm_yuv_rgb.g5b"
#include "exa_wm_write.g5b"
};
 
#define NOKERNEL(kernel_enum, func, masked) \
[kernel_enum] = {func, 0, masked}
#define KERNEL(kernel_enum, kernel, masked) \
[kernel_enum] = {&kernel, sizeof(kernel), masked}
static const struct wm_kernel_info {
const void *data;
unsigned int size;
bool has_mask;
} wm_kernels[] = {
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen5_blend_op[] = {
/* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
/* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
/* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
/* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen5_blend_op.
*
* This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
* GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
 
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
static bool
gen5_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel);
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
}
 
static int
gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen5_blend_op[op].src_alpha)
base = WM_KERNEL_MASKSA;
else
base = WM_KERNEL_MASKCA;
} else
base = WM_KERNEL_MASK;
} else
base = WM_KERNEL;
 
return base + !is_affine;
}
 
static bool gen5_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen5_render_state *state = &sna->render_state.gen5;
 
if (!op->need_magic_ca_pass)
return false;
 
assert(sna->render.vertex_index > sna->render.vertex_start);
 
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
 
gen5_emit_pipelined_pointers
(sna, op, PictOpAdd,
gen5_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
 
OUT_BATCH(GEN5_3DPRIMITIVE |
GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static uint32_t gen5_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
src = GEN5_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN5_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN5_BLENDFACTOR_DST_ALPHA)
src = GEN5_BLENDFACTOR_ONE;
else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
src = GEN5_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen5_blend_op[op].src_alpha) {
if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
dst = GEN5_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen5_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
}
}
 
static uint32_t gen5_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
}
}
typedef struct gen5_surface_state_padded {
struct gen5_surface_state state;
char pad[32 - sizeof(struct gen5_surface_state)];
} gen5_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen5_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
break;
}
}
 
static uint32_t
gen5_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN5_SURFACE_TILED;
case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen5_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t domains;
uint16_t offset;
uint32_t *ss;
 
/* After the first bind, we manage the cache domains within the batch */
if (!DBG_NO_SURFACE_CACHE) {
offset = kgem_bo_get_binding(bo, format);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
}
 
offset = sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
 
ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
GEN5_SURFACE_BLEND_ENABLED |
format << GEN5_SURFACE_FORMAT_SHIFT);
 
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
ss[2] = ((width - 1) << GEN5_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
ss[3] = (gen5_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen5_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen5.ve_id;
 
assert((sna->render.vb_id & (1 << id)) == 0);
 
OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen5_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN5_3DPRIMITIVE |
GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
}
 
static bool gen5_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen5.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 20 : 6;
if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & (1 << id)) == 0)
gen5_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen5_emit_primitive(sna);
 
return true;
}
 
static int gen5_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen5_magic_ca_pass(sna, op))
gen5_emit_pipelined_pointers(sna, op, op->op,
op->u.gen5.wm_kernel);
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen5_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna,
const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen5_get_rectangles__flush(sna, op);
if (unlikely (rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen5_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
static uint32_t *
gen5_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
/* Clear all surplus entries to zero in case of prefetch */
*offset = sna->kgem.surface;
return memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen5_surface_state_padded));
}
 
static void
gen5_emit_urb(struct sna *sna)
{
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
int urb_clip_start, urb_clip_size;
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
 
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
urb_clip_start = urb_gs_start + urb_gs_size;
urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
urb_sf_start = urb_clip_start + urb_clip_size;
urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
OUT_BATCH(GEN5_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
UF0_CLIP_REALLOC |
UF0_GS_REALLOC |
UF0_VS_REALLOC |
1);
OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 
/* Constant buffer state */
OUT_BATCH(GEN5_CS_URB_STATE | 0);
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
}
 
static void
gen5_emit_state_base_address(struct sna *sna)
{
assert(sna->render_state.gen5.general_bo->proxy == NULL);
OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
sna->kgem.nbatch,
sna->render_state.gen5.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* media */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen5.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, all disabled */
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen5_emit_invariant(struct sna *sna)
{
/* Ironlake errata workaround: Before disabling the clipper,
* you have to MI_FLUSH to get the pipeline idle.
*
* However, the kernel flushes the pipeline between batches,
* so we should be safe....
* OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
*/
OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen5_emit_state_base_address(sna);
 
sna->render_state.gen5.needs_invariant = false;
}
 
static void
gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen5.needs_invariant)
gen5_emit_invariant(sna);
}
 
static void
gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen5.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
}
}
 
static void
gen5_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.surface_table == offset)
return;
 
sna->render_state.gen5.surface_table = offset;
 
/* Binding table pointers */
OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
}
 
static bool
gen5_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
uint16_t sp, bp;
uint32_t key;
 
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
__FUNCTION__, op->u.gen5.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel);
bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
if (key == sna->render_state.gen5.last_pipelined_pointers)
return false;
 
 
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen5.vs);
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
OUT_BATCH(sna->render_state.gen5.wm + sp);
OUT_BATCH(sna->render_state.gen5.cc + bp);
 
sna->render_state.gen5.last_pipelined_pointers = key;
return true;
}
 
static void
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.drawrect_limit == limit &&
sna->render_state.gen5.drawrect_offset == offset)
return;
 
sna->render_state.gen5.drawrect_offset = offset;
sna->render_state.gen5.drawrect_limit = limit;
 
OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0x00000000);
OUT_BATCH(limit);
OUT_BATCH(offset);
}
 
static void
gen5_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen5_render_state *render = &sna->render_state.gen5;
int id = op->u.gen5.ve_id;
bool has_mask = id >> 2;
uint32_t format, dw;
 
if (!DBG_NO_STATE_CACHE && render->ve_id == id)
return;
 
DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
((2 * (has_mask ? 4 : 3)) + 1 - 2));
 
OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
(GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
(0 << VE0_OFFSET_SHIFT));
OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
id, id & 3));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
format | 4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
id, id >> 2, offset));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
format | offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
static void
gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo)));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
}
 
/* drawrect must be first for Ironlake BLT workaround */
gen5_emit_drawing_rectangle(sna, op);
gen5_emit_binding_table(sna, offset);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel))
gen5_emit_urb(sna);
gen5_emit_vertex_elements(sna, op);
}
 
static void gen5_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
 
gen5_get_batch(sna, op);
 
binding_table = gen5_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen5_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen5_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
assert(op->u.gen5.ve_id >> 2);
binding_table[2] =
gen5_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen5.surface_table;
}
 
gen5_emit_state(sna, op, offset);
}
 
fastcall static void
gen5_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
op->prim_emit(sna, op, r);
}
 
 
static void
gen5_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna,op);
}
 
DBG(("%s()\n", __FUNCTION__));
 
}
 
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(tmp);
 
tmp->blt = gen5_render_composite_blt;
// tmp->box = gen5_render_composite_box;
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
}
 
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
}
 
 
 
static void
gen5_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen5_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (!kgem->nbatch)
return;
 
/* WaNonPipelinedStateCommandFlush
*
* Ironlake has a limitation that a 3D or Media command can't
* be the first command after a BLT, unless it's
* non-pipelined.
*
* We do this by ensuring that the non-pipelined drawrect
* is always emitted first following a switch from BLT.
*/
if (kgem->mode == KGEM_BLT) {
struct sna *sna = to_sna_from_kgem(kgem);
DBG(("%s: forcing drawrect on next state emission\n",
__FUNCTION__));
sna->render_state.gen5.drawrect_limit = -1;
}
 
if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen5_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen5_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen5_render_reset(struct sna *sna)
{
sna->render_state.gen5.needs_invariant = true;
sna->render_state.gen5.ve_id = -1;
sna->render_state.gen5.last_primitive = -1;
sna->render_state.gen5.last_pipelined_pointers = 0;
 
sna->render_state.gen5.drawrect_offset = -1;
sna->render_state.gen5.drawrect_limit = -1;
sna->render_state.gen5.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen5_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
}
 
static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
{
struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
/* Set up the vertex shader to be disabled (passthrough) */
vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs->vs6.vs_enable = 0;
vs->vs6.vert_cache_disable = 1;
 
return sna_static_stream_offsetof(stream, vs);
}
 
static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
uint32_t kernel)
{
struct gen5_sf_unit_state *sf_state;
 
sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
 
sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf_state->thread0.kernel_start_pointer = kernel >> 6;
 
sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
/* don't smash vertex header, read start from dw8 */
sf_state->thread3.urb_entry_read_offset = 1;
sf_state->thread3.dispatch_grf_start_reg = 3;
sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf_state->sf5.viewport_transform = false; /* skip viewport */
sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
sf_state->sf6.scissor = 0;
sf_state->sf7.trifan_pv = 2;
sf_state->sf6.dest_org_vbias = 0x8;
sf_state->sf6.dest_org_hbias = 0x8;
 
return sna_static_stream_offsetof(stream, sf_state);
}
 
static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
sampler_filter_t src_filter,
sampler_extend_t src_extend,
sampler_filter_t mask_filter,
sampler_extend_t mask_extend)
{
struct gen5_sampler_state *sampler_state;
 
sampler_state = sna_static_stream_map(stream,
sizeof(struct gen5_sampler_state) * 2,
32);
sampler_state_init(&sampler_state[0], src_filter, src_extend);
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
return sna_static_stream_offsetof(stream, sampler_state);
}
 
static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
{
state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
state->thread0.kernel_start_pointer = kernel >> 6;
 
state->thread1.single_program_flow = 0;
 
/* scratch space is not used in our kernel */
state->thread2.scratch_space_base_pointer = 0;
state->thread2.per_thread_scratch_space = 0;
 
state->thread3.const_urb_entry_read_length = 0;
state->thread3.const_urb_entry_read_offset = 0;
 
state->thread3.urb_entry_read_offset = 0;
/* wm kernel use urb from 3, see wm_program in compiler module */
state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
 
state->wm4.sampler_count = 0; /* hardware requirement */
 
state->wm4.sampler_state_pointer = sampler >> 5;
state->wm5.max_threads = PS_MAX_THREADS - 1;
state->wm5.transposed_urb_read = 0;
state->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
* start point
*/
state->wm5.enable_16_pix = 1;
state->wm5.enable_8_pix = 0;
state->wm5.early_depth_test = 1;
 
/* Each pair of attributes (src/mask coords) is two URB entries */
if (has_mask) {
state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
state->thread3.urb_entry_read_length = 4;
} else {
state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
state->thread3.urb_entry_read_length = 2;
}
 
/* binding table entry count is only used for prefetching,
* and it has to be set 0 for Ironlake
*/
state->thread1.binding_table_entry_count = 0;
}
 
static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
int i, j;
 
base = ptr =
sna_static_stream_map(stream,
GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
64);
 
for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
struct gen5_cc_unit_state *state =
(struct gen5_cc_unit_state *)ptr;
 
state->cc3.blend_enable =
!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
 
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
 
/* Fill in alpha blend factors same as color, for the future. */
state->cc5.ia_src_blend_factor = i;
state->cc5.ia_dest_blend_factor = j;
 
state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
state->cc6.clamp_post_alpha_blend = 1;
state->cc6.clamp_pre_alpha_blend = 1;
state->cc6.src_blend_factor = i;
state->cc6.dest_blend_factor = j;
 
ptr += 64;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
static bool gen5_render_setup(struct sna *sna)
{
struct gen5_render_state *state = &sna->render_state.gen5;
struct sna_static_stream general;
struct gen5_wm_unit_state_padded *wm_state;
uint32_t sf[2], wm[KERNEL_COUNT];
int i, j, k, l, m;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
/* Set up the two SF states (one for blending with a mask, one without) */
sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
 
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
wm[m] = sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data,
16);
}
assert(wm[m]);
}
 
state->vs = gen5_create_vs_unit_state(&general);
 
state->sf[0] = gen5_create_sf_state(&general, sf[0]);
state->sf[1] = gen5_create_sf_state(&general, sf[1]);
 
 
/* Set up the WM states: each filter/extend type for source and mask, per
* kernel.
*/
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT,
64);
state->wm = sna_static_stream_offsetof(&general, wm_state);
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
uint32_t sampler_state;
 
sampler_state =
gen5_create_sampler_state(&general,
i, j,
k, l);
 
for (m = 0; m < KERNEL_COUNT; m++) {
gen5_init_wm_state(&wm_state->state,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
}
}
}
}
}
 
state->cc = gen5_create_cc_unit_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
bool gen5_render_init(struct sna *sna)
{
if (!gen5_render_setup(sna))
return false;
 
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
 
sna->render.blit_tex = gen5_blit_tex;
 
sna->render.flush = gen5_render_flush;
sna->render.reset = gen5_render_reset;
sna->render.fini = gen5_render_fini;
 
sna->render.max_3d_size = MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
/drivers/video/Intel-2D/gen6_render.c
0,0 → 1,3508
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen6_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_BOXES 0
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 1
#define PREFER_RENDER 0
 
#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
 
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
#error "Must select at least 8, 16 or 32 pixel dispatch"
#endif
 
#define GEN6_MAX_SIZE 8192
 
struct gt_info {
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
struct {
int size;
int max_vs_entries;
int max_gs_entries;
} urb;
};
 
static const struct gt_info gt1_info = {
.max_vs_threads = 24,
.max_gs_threads = 21,
.max_wm_threads = 40,
.urb = { 32, 256, 256 },
};
 
static const struct gt_info gt2_info = {
.max_vs_threads = 60,
.max_gs_threads = 60,
.max_wm_threads = 80,
.urb = { 64, 256, 256 },
};
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_planar.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
#define NOKERNEL(kernel_enum, func, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
#define KERNEL(kernel_enum, kernel, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
 
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
unsigned int num_surfaces;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
 
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
 
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
 
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
 
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
 
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen6_blend_op[] = {
/* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
/* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
/* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
/* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen6_blend_op.
*
* This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
* GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
 
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
 
#define SAMPLER_OFFSET(sf, se, mf, me) \
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
 
#define VERTEX_2s2s 0
 
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
 
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
#define FILL_VERTEX VERTEX_2s2s
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
 
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V)))
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
}
 
static uint32_t gen6_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
 
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN6_BLENDFACTOR_DST_ALPHA)
src = GEN6_BLENDFACTOR_ONE;
else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
src = GEN6_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen6_blend_op[op].src_alpha) {
if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
}
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, (int)BLEND_OFFSET(src, dst)));
#endif
 
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen6_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
};
 
/*
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
*/
}
 
static uint32_t gen6_get_dest_format(PictFormat format)
{
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
 
#if 0
 
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
#endif
 
}
 
#if 0
 
static bool gen6_check_dst_format(PictFormat format)
{
if (gen6_get_dest_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static bool gen6_check_format(uint32_t format)
{
if (gen6_get_card_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static uint32_t gen6_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return SAMPLER_FILTER_NEAREST;
case PictFilterBilinear:
return SAMPLER_FILTER_BILINEAR;
}
}
 
static uint32_t gen6_check_filter(PicturePtr picture)
{
switch (picture->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
 
static uint32_t gen6_repeat(uint32_t repeat)
{
switch (repeat) {
default:
assert(0);
case RepeatNone:
return SAMPLER_EXTEND_NONE;
case RepeatNormal:
return SAMPLER_EXTEND_REPEAT;
case RepeatPad:
return SAMPLER_EXTEND_PAD;
case RepeatReflect:
return SAMPLER_EXTEND_REFLECT;
}
}
 
static bool gen6_check_repeat(PicturePtr picture)
{
if (!picture->repeat)
return true;
 
switch (picture->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
#endif
 
static int
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
 
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
base = GEN6_WM_KERNEL_MASKSA;
else
base = GEN6_WM_KERNEL_MASKCA;
} else
base = GEN6_WM_KERNEL_MASK;
 
} else
base = GEN6_WM_KERNEL_NOMASK;
 
return base + !is_affine;
}
 
static void
gen6_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
(sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
}
 
static void
gen6_emit_state_base_address(struct sna *sna)
{
OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen6_emit_viewports(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
(4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_vs(struct sna *sna)
{
/* disable VS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
OUT_BATCH(0); /* no VS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_gs(struct sna *sna)
{
/* disable GS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
OUT_BATCH(0); /* no GS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_clip(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
OUT_BATCH(0);
}
 
static void
gen6_emit_wm_constants(struct sna *sna)
{
/* disable WM constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_null_depth_buffer(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen6_emit_invariant(struct sna *sna)
{
OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
OUT_BATCH(1);
 
gen6_emit_urb(sna);
 
gen6_emit_state_base_address(sna);
 
gen6_emit_viewports(sna);
gen6_emit_vs(sna);
gen6_emit_gs(sna);
gen6_emit_clip(sna);
gen6_emit_wm_constants(sna);
gen6_emit_null_depth_buffer(sna);
 
sna->render_state.gen6.needs_invariant = false;
}
 
static bool
gen6_emit_cc(struct sna *sna, int blend)
{
struct gen6_render_state *render = &sna->render_state.gen6;
 
if (render->blend == blend)
return blend != NO_BLEND;
 
DBG(("%s: blend = %x\n", __FUNCTION__, blend));
 
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
OUT_BATCH((render->cc_blend + blend) | 1);
if (render->blend == (unsigned)-1) {
OUT_BATCH(1);
OUT_BATCH(1);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
}
 
render->blend = blend;
return blend != NO_BLEND;
}
 
static void
gen6_emit_sampler(struct sna *sna, uint32_t state)
{
if (sna->render_state.gen6.samplers == state)
return;
 
sna->render_state.gen6.samplers = state;
 
DBG(("%s: sampler = %x\n", __FUNCTION__, state));
 
OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
OUT_BATCH(sna->render_state.gen6.wm_state + state);
}
 
static void
gen6_emit_sf(struct sna *sna, bool has_mask)
{
int num_sf_outputs = has_mask ? 2 : 1;
 
if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
return;
 
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
__FUNCTION__, num_sf_outputs, 1, 0));
 
sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
 
OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0);
OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW9 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW14 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW19 */
}
 
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
{
const uint32_t *kernels;
 
if (sna->render_state.gen6.kernel == kernel)
return;
 
sna->render_state.gen6.kernel = kernel;
kernels = sna->render_state.gen6.wm_kernel[kernel];
 
DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
kernels[0], kernels[1], kernels[2]));
 
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch space */
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
(kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
}
 
static bool
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen6.surface_table == offset)
return false;
 
/* Binding table pointers */
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
 
sna->render_state.gen6.surface_table = offset;
return true;
}
 
static bool
gen6_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
return false;
 
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*/
if (!sna->render_state.gen6.first_state_packet) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16 |
I915_GEM_DOMAIN_INSTRUCTION,
64));
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
 
sna->render_state.gen6.drawrect_offset = offset;
sna->render_state.gen6.drawrect_limit = limit;
return true;
}
 
static void
gen6_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen6_render_state *render = &sna->render_state.gen6;
uint32_t src_format, dw;
int id = GEN6_VERTEX(op->u.gen6.flags);
bool has_mask;
 
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
((2 * (3 + has_mask)) + 1 - 2));
 
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
static void
gen6_emit_flush(struct sna *sna)
{
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
GEN6_PIPE_CONTROL_TC_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall = wm_binding_table & 1;
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_vertex_elements(sna, op);
 
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
if (gen6_emit_drawing_rectangle(sna, op))
need_stall = false;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
if (op->dst.bo->exec)
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
if (need_stall) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
sna->render_state.gen6.first_state_packet = false;
}
 
static bool gen6_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen6_render_state *state = &sna->render_state.gen6;
 
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
 
gen6_emit_flush(sna);
 
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine),
true);
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
typedef struct gen6_surface_state_padded {
struct gen6_surface_state state;
char pad[32 - sizeof(struct gen6_surface_state)];
} gen6_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void scratch_create(struct sna_static_stream *stream)
{
/* 64 bytes of scratch space for random writes, such as
* the pipe-control w/a.
*/
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen6_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
break;
}
}
 
static void
sampler_copy_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static void
sampler_fill_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
return 0;
/*
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN6_SURFACE_TILED;
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
}
*/
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
gen6_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t *ss;
uint32_t domains;
uint16_t offset;
uint32_t is_scanout = is_dst && bo->scanout;
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
is_dst ? "render" : "sampler"));
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
assert(bo->pitch <= (1 << 18));
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = is_scanout ? 0 : 3 << 16;
 
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen6_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = GEN6_VERTEX(op->u.gen6.flags);
 
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen6_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
__FUNCTION__,
sna->render.vertex_start,
sna->render.vertex_index));
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
DBG(("%s: started new primitive: index=%d\n",
__FUNCTION__, sna->render.vertex_start));
 
sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
}
 
static bool gen6_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 60 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen6_emit_vertex_buffer(sna, op);
 
gen6_emit_primitive(sna);
return true;
}
 
static int gen6_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen6_magic_ca_pass(sna, op)) {
gen6_emit_flush(sna);
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
gen6_emit_wm(sna,
GEN6_KERNEL(op->u.gen6.flags),
GEN6_VERTEX(op->u.gen6.flags) >> 2);
}
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen6_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *, const struct sna_composite_op *op))
{
int rem;
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen6_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0 &&
!gen6_rectangle_begin(sna, op)))
goto flush;
 
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
uint32_t *table;
 
sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
/* Clear all surplus entries to zero in case of prefetch */
table = memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen6_surface_state_padded));
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
*offset = sna->kgem.surface;
return table;
}
 
static bool
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen6.needs_invariant)
gen6_emit_invariant(sna);
 
return kgem_bo_is_dirty(op->dst.bo);
}
 
static void gen6_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
binding_table[2] =
gen6_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static void
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert (sna->render.vertex_offset == 0);
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen6.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
 
 
fastcall static void
gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, r);
}
 
#if 0
 
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen6_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen6_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t
gen6_composite_create_blend_state(struct sna_static_stream *stream)
{
char *base, *ptr;
int src, dst;
 
base = sna_static_stream_map(stream,
GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
64);
 
ptr = base;
for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
struct gen6_blend_state *blend =
(struct gen6_blend_state *)ptr;
 
blend->blend0.dest_blend_factor = dst;
blend->blend0.source_blend_factor = src;
blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
blend->blend0.blend_enable =
!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
 
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.pre_blend_clamp_enable = 1;
 
ptr += GEN6_BLEND_STATE_PADDED_SIZE;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
 
static uint32_t gen6_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen6_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN6_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen6_emit_video_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
bool dirty;
int n_src, n;
 
dirty = gen6_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen6_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static bool
gen6_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__, src_w, src_h, drw_w, drw_h,
REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_w == drw_w && src_h == drw_h)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN6_WM_KERNEL_VIDEO_PLANAR :
GEN6_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_video_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
priv->clear = false;
 
gen4_vertex_flush(sna);
return true;
}
 
static int
gen6_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen6_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen6_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen6_get_card_format(picture->format);
if (channel->card_format == (unsigned)-1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
{
channel->repeat = gen6_repeat(channel->repeat);
channel->filter = gen6_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen6_get_card_format(channel->pict_format);
assert(channel->card_format != (unsigned)-1);
}
 
#endif
 
static void gen6_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
 
 
// sna_render_composite_redirect_done(sna, op);
}
 
#if 0
 
static bool
gen6_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.format = dst->format;
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
 
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
// op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
// PREFER_GPU | FORCE_GPU | RENDER_GPU,
// &box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h))
return false;
 
return true;
}
 
 
static bool
gen6_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
if (op == PictOpClear)
op = PictOpSrc;
tmp->op = op;
if (!gen6_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
/* Did we just switch rings to prepare the source? */
if (mask == NULL &&
prefer_blt_composite(sna, tmp) &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen6_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.bo = NULL;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen6_blend_op[op].src_alpha &&
(gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
if (op != PictOpOver)
goto cleanup_src;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen6_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
gen4_choose_composite_emitter(tmp));
 
// tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
// tmp->boxes = gen6_render_composite_boxes__blt;
// if (tmp->emit_boxes) {
// tmp->boxes = gen6_render_composite_boxes;
// tmp->thread_boxes = gen6_render_composite_boxes__thread;
// }
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen6_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen6_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
assert(!sna->render.active);
 
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen6_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
if (gen6_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen6_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen6_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->base.src);
break;
}
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
tmp->base.src.repeat,
SAMPLER_FILTER_NEAREST,
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
gen4_choose_spans_emitter(tmp));
 
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, &tmp->base);
gen6_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
 
static void
gen6_emit_copy_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
 
static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *src_bo,
struct kgem_bo *dst_bo,
unsigned flags)
{
if (flags & COPY_SYNC)
return false;
 
if (PREFER_RENDER)
return PREFER_RENDER > 0;
 
if (sna->kgem.ring == KGEM_BLT)
return true;
 
if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
return true;
 
if (untiled_tlb_miss(src_bo) ||
untiled_tlb_miss(dst_bo))
return true;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
return (prefer_blt_bo(sna, src_bo) >= 0 &&
prefer_blt_bo(sna, dst_bo) > 0);
}
 
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
{
*extents = box[0];
while (--n) {
box++;
 
if (box->x1 < extents->x1)
extents->x1 = box->x1;
if (box->x2 > extents->x2)
extents->x2 = box->x2;
 
if (box->y1 < extents->y1)
extents->y1 = box->y1;
if (box->y2 > extents->y2)
extents->y2 = box->y2;
}
}
 
static inline bool
overlaps(struct sna *sna,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, BoxRec *extents)
{
if (src_bo != dst_bo)
return false;
 
boxes_extents(box, n, extents);
return (extents->x2 + src_dx > extents->x1 + dst_dx &&
extents->x1 + src_dx < extents->x2 + dst_dx &&
extents->y2 + src_dy > extents->y1 + dst_dy &&
extents->y1 + src_dy < extents->y2 + dst_dy);
}
 
static bool
gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
BoxRec extents;
 
DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
src_bo == dst_bo,
overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear)) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
if (overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)) {
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
goto fallback_blt;
 
if (can_switch_to_blt(sna, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_render_copy_boxes__overlap(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n, &extents);
}
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(tmp.src.pict_format))
goto fallback_blt;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
goto fallback_tiled;
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
 
tmp.dst.x = tmp.dst.y = 0;
}
 
tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (extents.x1 < box[i].x1)
extents.x1 = box[i].x1;
if (extents.y1 < box[i].y1)
extents.y1 = box[i].y1;
 
if (extents.x2 > box[i].x2)
extents.x2 = box[i].x2;
if (extents.y2 > box[i].y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1)) {
DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
goto fallback_tiled_dst;
}
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
} else {
tmp.src.bo = src_bo;
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
}
 
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = 0;
 
tmp.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
DBG(("%s: too large for a single operation\n",
__FUNCTION__));
goto fallback_tiled_src;
}
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_copy_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int16_t *v;
int n_this_time;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_copy_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
 
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
v[0] = box->x2 + dst_dx;
v[2] = box->x2 + src_dx;
v[1] = v[5] = box->y2 + dst_dy;
v[3] = v[7] = box->y2 + src_dy;
v[8] = v[4] = box->x1 + dst_dx;
v[10] = v[6] = box->x1 + src_dx;
v[9] = box->y1 + dst_dy;
v[11] = box->y1 + src_dy;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
static void
gen6_render_copy_blt(struct sna *sna,
const struct sna_copy_op *op,
int16_t sx, int16_t sy,
int16_t w, int16_t h,
int16_t dx, int16_t dy)
{
int16_t *v;
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dx+w; v[1] = dy+h;
v[2] = sx+w; v[3] = sy+h;
v[4] = dx; v[5] = dy+h;
v[6] = sx; v[7] = sy+h;
v[8] = dx; v[9] = dy;
v[10] = sx; v[11] = sy;
}
 
static void
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
}
 
static bool
gen6_render_copy(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
struct sna_copy_op *op)
{
DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
__FUNCTION__, alu,
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
 
fallback:
 
op->base.dst.format = PIXMAN_a8r8g8b8;
op->base.src.pict_format = op->base.dst.format;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.bo = dst_bo;
 
op->base.src.bo = src_bo;
op->base.src.card_format =
gen6_get_card_format(op->base.src.pict_format);
op->base.src.width = src->drawable.width;
op->base.src.height = src->drawable.height;
 
op->base.mask.bo = NULL;
 
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
goto fallback;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_copy_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_copy_blt;
op->done = gen6_render_copy_done;
return true;
}
#endif
 
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
 
 
 
#if 0
 
static void
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, 1, 1,
GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface +=
sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (untiled_tlb_miss(bo))
return true;
 
return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
}
 
static bool
gen6_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen6_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen6_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS(op, format);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
 
static void
gen6_render_op_fill_blt(struct sna *sna,
const struct sna_fill_op *op,
int16_t x, int16_t y, int16_t w, int16_t h)
{
int16_t *v;
 
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x+w;
v[4] = v[8] = x;
v[1] = v[5] = y+h;
v[9] = y;
 
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_box(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box)
{
int16_t *v;
 
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_boxes(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box,
int nbox)
{
DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2, nbox));
 
do {
int nbox_this_time;
int16_t *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_fill_state);
nbox -= nbox_this_time;
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6 * nbox_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
do {
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
box++; v += 12;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
 
static bool
gen6_render_fill(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
struct sna_fill_op *op)
{
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
 
if (prefer_blt_fill(sna, dst_bo) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op);
 
if (alu == GXclear)
color = 0;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.dst.bo = dst_bo;
op->base.dst.x = op->base.dst.y = 0;
 
op->base.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
op->base.mask.bo = NULL;
 
op->base.need_magic_ca_pass = false;
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_op_fill_blt;
op->box = gen6_render_op_fill_box;
op->boxes = gen6_render_op_fill_boxes;
op->done = gen6_render_op_fill_done;
return true;
}
 
static bool
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint8_t alu)
{
BoxRec box;
 
box.x1 = x1;
box.y1 = y1;
box.x2 = x2;
box.y2 = y2;
 
return sna_blt_fill_boxes(sna, alu,
bo, dst->drawable.bitsPerPixel,
color, &box, 1);
}
 
static bool
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1,
int16_t x2, int16_t y2,
uint8_t alu)
{
struct sna_composite_op tmp;
int16_t *v;
 
/* Prefer to use the BLT if already engaged */
if (prefer_blt_fill(sna, bo) &&
gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
 
if (alu == GXclear)
color = 0;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return false;
}
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x2;
v[8] = v[4] = x1;
v[5] = v[1] = y2;
v[9] = y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
 
static bool
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
BoxRec box;
 
box.x1 = 0;
box.y1 = 0;
box.x2 = dst->drawable.width;
box.y2 = dst->drawable.height;
 
return sna_blt_fill_boxes(sna, GXclear,
bo, dst->drawable.bitsPerPixel,
0, &box, 1);
}
 
static bool
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
struct sna_composite_op tmp;
int16_t *v;
 
DBG(("%s: %dx%d\n",
__FUNCTION__,
dst->drawable.width,
dst->drawable.height));
 
/* Prefer to use the BLT if, and only if, already engaged */
if (sna->kgem.ring == KGEM_BLT &&
gen6_render_clear_try_blt(sna, dst, bo))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_clear_try_blt(sna, dst, bo);
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo = sna_render_get_solid(sna, 0);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return false;
}
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dst->drawable.width;
v[5] = v[1] = dst->drawable.height;
v[8] = v[4] = 0;
v[9] = 0;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
#endif
 
static void gen6_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen6_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen6_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen6_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen6_render_reset(struct sna *sna)
{
sna->render_state.gen6.needs_invariant = true;
sna->render_state.gen6.first_state_packet = true;
sna->render_state.gen6.ve_id = 3 << 2;
sna->render_state.gen6.last_primitive = -1;
 
sna->render_state.gen6.num_sf_outputs = 0;
sna->render_state.gen6.samplers = -1;
sna->render_state.gen6.blend = -1;
sna->render_state.gen6.kernel = -1;
sna->render_state.gen6.drawrect_offset = -1;
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen6_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
}
 
static bool is_gt2(struct sna *sna)
{
return DEVICE_ID(sna->PciInfo) & 0x30;
}
 
static bool is_mobile(struct sna *sna)
{
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
{
struct gen6_render_state *state = &sna->render_state.gen6;
struct sna_static_stream general;
struct gen6_sampler_state *ss;
int i, j, k, l, m;
 
state->info = &gt1_info;
if (is_gt2(sna))
state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
scratch_create(&general);
 
for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
if (USE_8_PIXEL_DISPATCH) {
state->wm_kernel[m][0] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 8);
}
 
if (USE_16_PIXEL_DISPATCH) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
 
if (USE_32_PIXEL_DISPATCH) {
state->wm_kernel[m][2] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 32);
}
}
if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
}
 
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
(2 +
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT),
32);
state->wm_state = sna_static_stream_offsetof(&general, ss);
sampler_copy_init(ss); ss += 2;
sampler_fill_init(ss); ss += 2;
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
sampler_state_init(ss++, i, j);
sampler_state_init(ss++, k, l);
}
}
}
}
 
state->cc_blend = gen6_composite_create_blend_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
bool gen6_render_init(struct sna *sna)
{
if (!gen6_render_setup(sna))
return false;
 
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen6_render_expire;
 
// sna->render.composite = gen6_render_composite;
// sna->render.video = gen6_render_video;
 
// sna->render.copy_boxes = gen6_render_copy_boxes;
 
sna->render.blit_tex = gen6_blit_tex;
 
// sna->render.copy = gen6_render_copy;
 
// sna->render.fill_boxes = gen6_render_fill_boxes;
// sna->render.fill = gen6_render_fill;
// sna->render.fill_one = gen6_render_fill_one;
// sna->render.clear = gen6_render_clear;
 
sna->render.flush = gen6_render_flush;
sna->render.reset = gen6_render_reset;
sna->render.fini = gen6_render_fini;
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
 
/drivers/video/Intel-2D/gen7_render.c
0,0 → 1,1797
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen7_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_BOXES 0
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 0
 
#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
 
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
#error "Must select at least 8, 16 or 32 pixel dispatch"
#endif
 
#define GEN7_MAX_SIZE 16384
 
/* XXX Todo
*
* STR (software tiled rendering) mode. No, really.
* 64x32 pixel blocks align with the rendering cache. Worth considering.
*/
 
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 
struct gt_info {
uint32_t max_vs_threads;
uint32_t max_gs_threads;
uint32_t max_wm_threads;
struct {
int size;
int max_vs_entries;
int max_gs_entries;
} urb;
};
 
static const struct gt_info ivb_gt_info = {
.max_vs_threads = 16,
.max_gs_threads = 16,
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 64, 64 },
};
 
static const struct gt_info ivb_gt1_info = {
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192 },
};
 
static const struct gt_info ivb_gt2_info = {
.max_vs_threads = 128,
.max_gs_threads = 128,
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 256, 704, 320 },
};
 
static const struct gt_info hsw_gt_info = {
.max_vs_threads = 8,
.max_gs_threads = 8,
.max_wm_threads =
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 64, 64 },
};
 
static const struct gt_info hsw_gt1_info = {
.max_vs_threads = 70,
.max_gs_threads = 70,
.max_wm_threads =
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 640, 256 },
};
 
static const struct gt_info hsw_gt2_info = {
.max_vs_threads = 280,
.max_gs_threads = 280,
.max_wm_threads =
(204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640 },
};
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
#include "exa_wm_yuv_rgb.g7b"
#include "exa_wm_write.g7b"
};
 
static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_planar.g7b"
#include "exa_wm_yuv_rgb.g7b"
#include "exa_wm_write.g7b"
};
 
#define KERNEL(kernel_enum, kernel, num_surfaces) \
[GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
#define NOKERNEL(kernel_enum, func, num_surfaces) \
[GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
int num_surfaces;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
 
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
 
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
 
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
 
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
 
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen7_blend_op[] = {
/* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
/* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
/* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
/* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen7_blend_op.
*
* This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
* GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64)
 
#define BLEND_OFFSET(s, d) \
((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
(((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
 
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
 
#define SAMPLER_OFFSET(sf, se, mf, me) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
 
#define VERTEX_2s2s 0
 
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
 
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
#define FILL_VERTEX VERTEX_2s2s
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
 
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V)))
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
}
 
static uint32_t gen7_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
 
src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN7_BLENDFACTOR_DST_ALPHA)
src = GEN7_BLENDFACTOR_ONE;
else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
src = GEN7_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen7_blend_op[op].src_alpha) {
if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
dst = GEN7_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, (int)BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen7_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
}
}
 
static uint32_t gen7_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
}
}
 
static int
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen7_blend_op[op].src_alpha)
base = GEN7_WM_KERNEL_MASKSA;
else
base = GEN7_WM_KERNEL_MASKCA;
} else
base = GEN7_WM_KERNEL_MASK;
} else
base = GEN7_WM_KERNEL_NOMASK;
 
return base + !is_affine;
}
 
static void
gen7_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
OUT_BATCH(8); /* in 1KBs */
 
/* num of VS entries must be divisible by 8 if size < 9 */
OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
}
 
static void
gen7_emit_state_base_address(struct sna *sna)
{
OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen7_disable_vs(struct sna *sna)
{
/* For future reference:
* A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
* to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
* 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
* 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
*
* Here we saved by the full-flush incurred when emitting
* the batchbuffer.
*/
OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
OUT_BATCH(0); /* no VS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_hs(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
OUT_BATCH(0); /* no HS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_te(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_disable_ds(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_gs(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
OUT_BATCH(0); /* no GS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_streamout(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_sf_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_cc_invariant(struct sna *sna)
{
#if 0 /* unused, no change */
OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
OUT_BATCH(0);
#endif
 
/* XXX clear to be safe */
OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen7_disable_clip(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen7_emit_wm_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(0);
 
#if 0
/* XXX length bias of 7 in old spec? */
OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
#endif
}
 
static void
gen7_emit_null_depth_buffer(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
OUT_BATCH(0); /* disable depth, stencil and hiz */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
#endif
}
 
static void
gen7_emit_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
OUT_BATCH(1);
 
gen7_emit_urb(sna);
 
gen7_emit_state_base_address(sna);
 
gen7_disable_vs(sna);
gen7_disable_hs(sna);
gen7_disable_te(sna);
gen7_disable_ds(sna);
gen7_disable_gs(sna);
gen7_disable_clip(sna);
gen7_emit_sf_invariant(sna);
gen7_emit_wm_invariant(sna);
gen7_emit_cc_invariant(sna);
gen7_disable_streamout(sna);
gen7_emit_null_depth_buffer(sna);
 
sna->render_state.gen7.needs_invariant = false;
}
 
static void
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
{
struct gen7_render_state *render = &sna->render_state.gen7;
 
if (render->blend == blend_offset)
return;
 
DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
 
/* XXX can have upto 8 blend states preload, selectable via
* Render Target Index. What other side-effects of Render Target Index?
*/
 
assert (is_aligned(render->cc_blend + blend_offset, 64));
OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
OUT_BATCH((render->cc_blend + blend_offset) | 1);
 
render->blend = blend_offset;
}
 
static void
gen7_emit_sampler(struct sna *sna, uint32_t state)
{
if (sna->render_state.gen7.samplers == state)
return;
 
sna->render_state.gen7.samplers = state;
 
DBG(("%s: sampler = %x\n", __FUNCTION__, state));
 
assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
OUT_BATCH(sna->render_state.gen7.wm_state + state);
}
 
static void
gen7_emit_sf(struct sna *sna, bool has_mask)
{
int num_sf_outputs = has_mask ? 2 : 1;
 
if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
return;
 
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
__FUNCTION__, num_sf_outputs, 1, 0));
 
sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
 
OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0); /* dw4 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* dw8 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* dw12 */
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_wm(struct sna *sna, int kernel)
{
const uint32_t *kernels;
 
if (sna->render_state.gen7.kernel == kernel)
return;
 
sna->render_state.gen7.kernel = kernel;
kernels = sna->render_state.gen7.wm_kernel[kernel];
 
DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name,
wm_kernels[kernel].num_surfaces,
kernels[0], kernels[1], kernels[2]));
 
OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch address */
OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
(kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
(kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
GEN7_PS_ATTRIBUTE_ENABLE);
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
}
 
static bool
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen7.surface_table == offset)
return false;
 
/* Binding table pointers */
assert(is_aligned(4*offset, 32));
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
OUT_BATCH(offset*4);
 
sna->render_state.gen7.surface_table = offset;
return true;
}
 
static bool
gen7_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen7.drawrect_limit == limit &&
sna->render_state.gen7.drawrect_offset == offset)
return true;
 
sna->render_state.gen7.drawrect_offset = offset;
sna->render_state.gen7.drawrect_limit = limit;
 
OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
return false;
}
 
static void
gen7_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen7_render_state *render = &sna->render_state.gen7;
uint32_t src_format, dw;
int id = GEN7_VERTEX(op->u.gen7.flags);
bool has_mask;
 
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
((2 * (3 + has_mask)) + 1 - 2));
 
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
 
/* x,y */
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
4 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
offset << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
inline static void
gen7_emit_pipe_invalidate(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
GEN7_PIPE_CONTROL_TC_FLUSH |
GEN7_PIPE_CONTROL_CS_STALL);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
inline static void
gen7_emit_pipe_flush(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
inline static void
gen7_emit_pipe_stall(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall;
 
if (sna->render_state.gen7.emit_flush)
gen7_emit_pipe_flush(sna);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
gen7_emit_vertex_elements(sna, op);
 
need_stall = gen7_emit_binding_table(sna, wm_binding_table);
need_stall &= gen7_emit_drawing_rectangle(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
if (op->dst.bo->exec)
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
if (need_stall)
gen7_emit_pipe_stall(sna);
 
sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
}
 
static bool gen7_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen7_render_state *state = &sna->render_state.gen7;
 
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
 
gen7_emit_pipe_stall(sna);
 
gen7_emit_cc(sna,
GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
op->dst.format)));
gen7_emit_wm(sna,
gen7_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine));
 
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen7_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
break;
}
}
 
static void
sampler_copy_init(struct gen7_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static void
sampler_fill_init(struct gen7_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static uint32_t
gen7_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN7_SURFACE_TILED;
case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen7_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t *ss;
uint32_t domains;
int offset;
uint32_t is_scanout = is_dst && bo->scanout;
 
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
gen7_tiling_bits(bo->tiling) |
format << GEN7_SURFACE_FORMAT_SHIFT);
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = is_scanout ? 0 : 3 << 16;
ss[6] = 0;
ss[7] = 0;
if (sna->kgem.gen == 075)
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen7_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = GEN7_VERTEX(op->u.gen7.flags);
 
OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
GEN7_VB0_VERTEXDATA |
GEN7_VB0_ADDRESS_MODIFY_ENABLE |
4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen7_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
}
 
static bool gen7_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 60 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen7_emit_vertex_buffer(sna, op);
 
gen7_emit_primitive(sna);
return true;
}
 
static int gen7_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen7_magic_ca_pass(sna, op)) {
gen7_emit_pipe_stall(sna);
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
}
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen7_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen7_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen7_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(op->floats_per_rect >= vertex_space(sna));
assert(rem <= vertex_space(sna));
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
uint32_t *table;
 
sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
/* Clear all surplus entries to zero in case of prefetch */
table = memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen7_surface_state));
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
*offset = sna->kgem.surface;
return table;
}
 
static void
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
_kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
assert(sna->kgem.mode == KGEM_RENDER);
assert(sna->kgem.ring == KGEM_RENDER);
 
if (sna->render_state.gen7.needs_invariant)
gen7_emit_invariant(sna);
}
 
static void gen7_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
 
gen7_get_batch(sna, op);
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen7_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen7_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
binding_table[2] =
gen7_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
offset = sna->render_state.gen7.surface_table;
}
 
gen7_emit_state(sna, op, offset);
}
 
static void
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen7.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
}
}
 
fastcall static void
gen7_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
op->prim_emit(sna, op, r);
}
static uint32_t
gen7_composite_create_blend_state(struct sna_static_stream *stream)
{
char *base, *ptr;
int src, dst;
 
base = sna_static_stream_map(stream,
GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
64);
 
ptr = base;
for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
struct gen7_blend_state *blend =
(struct gen7_blend_state *)ptr;
 
blend->blend0.dest_blend_factor = dst;
blend->blend0.source_blend_factor = src;
blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
blend->blend0.blend_enable =
!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
 
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.pre_blend_clamp_enable = 1;
 
ptr += GEN7_BLEND_STATE_PADDED_SIZE;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
 
 
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
}
 
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
 
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen7_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: switch rings %d -> %d\n",
__FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen7_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen7_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen7_render_reset(struct sna *sna)
{
sna->render_state.gen7.emit_flush = false;
sna->render_state.gen7.needs_invariant = true;
sna->render_state.gen7.ve_id = 3 << 2;
sna->render_state.gen7.last_primitive = -1;
 
sna->render_state.gen7.num_sf_outputs = 0;
sna->render_state.gen7.samplers = -1;
sna->render_state.gen7.blend = -1;
sna->render_state.gen7.kernel = -1;
sna->render_state.gen7.drawrect_offset = -1;
sna->render_state.gen7.drawrect_limit = -1;
sna->render_state.gen7.surface_table = -1;
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen7_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
 
static bool is_gt2(struct sna *sna)
{
return DEVICE_ID(sna->PciInfo) & 0x20;
}
 
static bool is_mobile(struct sna *sna)
{
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
static bool gen7_render_setup(struct sna *sna)
{
struct gen7_render_state *state = &sna->render_state.gen7;
struct sna_static_stream general;
struct gen7_sampler_state *ss;
int i, j, k, l, m;
 
if (sna->kgem.gen == 070) {
state->info = &ivb_gt_info;
if (DEVICE_ID(sna->PciInfo) & 0xf) {
state->info = &ivb_gt1_info;
if (is_gt2(sna))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
} else if (sna->kgem.gen == 075) {
state->info = &hsw_gt_info;
if (DEVICE_ID(sna->PciInfo) & 0xf) {
state->info = &hsw_gt1_info;
if (is_gt2(sna))
state->info = &hsw_gt2_info;
}
} else
return false;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
if (USE_8_PIXEL_DISPATCH) {
state->wm_kernel[m][0] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 8);
}
 
if (USE_16_PIXEL_DISPATCH) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
 
if (USE_32_PIXEL_DISPATCH) {
state->wm_kernel[m][2] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 32);
}
}
assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
 
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
(2 +
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT),
32);
state->wm_state = sna_static_stream_offsetof(&general, ss);
sampler_copy_init(ss); ss += 2;
sampler_fill_init(ss); ss += 2;
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
sampler_state_init(ss++, i, j);
sampler_state_init(ss++, k, l);
}
}
}
}
 
state->cc_blend = gen7_composite_create_blend_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
bool gen7_render_init(struct sna *sna)
{
if (!gen7_render_setup(sna))
return false;
 
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
sna->kgem.expire = gen7_render_expire;
 
sna->render.blit_tex = gen7_blit_tex;
sna->render.flush = gen7_render_flush;
sna->render.reset = gen7_render_reset;
sna->render.fini = gen7_render_fini;
 
sna->render.max_3d_size = GEN7_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
return true;
}
 
 
/drivers/video/Intel-2D/kgem-sna.c
0,0 → 1,4257
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
 
static inline
int user_free(void *mem)
{
int val;
__asm__ __volatile__(
"int $0x40"
:"=a"(val)
:"a"(68),"b"(12),"c"(mem));
return val;
}
 
 
unsigned int cpu_cache_size();
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
#define DBG_NO_HW 0
#define DBG_NO_TILING 1
#define DBG_NO_CACHE 0
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 1
#define DBG_NO_UPLOAD_CACHE 0
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
 
#ifndef DEBUG_SYNC
#define DEBUG_SYNC 0
#endif
 
#define SHOW_BATCH 0
 
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
#else
#define ASSERT_IDLE(kgem__, handle__)
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
#endif
 
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
* offsets be aligned to a cacheline (64 bytes).
*/
#define UPLOAD_ALIGNMENT 128
 
#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
 
#define MAX_GTT_VMA_CACHE 512
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
 
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
struct local_i915_gem_userptr {
uint64_t user_ptr;
uint32_t user_size;
uint32_t flags;
#define I915_USERPTR_READ_ONLY (1<<0)
#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
uint32_t handle;
};
 
#define UNCACHED 0
#define SNOOPED 1
 
struct local_i915_gem_cacheing {
uint32_t handle;
uint32_t cacheing;
};
 
#define LOCAL_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING
 
struct local_fbinfo {
int width;
int height;
int pitch;
int tiling;
};
 
struct kgem_buffer {
struct kgem_bo base;
void *mem;
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
};
 
static struct kgem_bo *__kgem_freed_bo;
static struct kgem_request *__kgem_freed_request;
static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
 
static inline int bytes(struct kgem_bo *bo)
{
return __kgem_bo_size(bo);
}
 
#define bucket(B) (B)->size.pages.bucket
#define num_pages(B) (B)->size.pages.count
 
#ifdef DEBUG_MEMORY
static void debug_alloc(struct kgem *kgem, size_t size)
{
kgem->debug_memory.bo_allocs++;
kgem->debug_memory.bo_bytes += size;
}
static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
{
debug_alloc(kgem, bytes(bo));
}
#else
#define debug_alloc(k, b)
#define debug_alloc__bo(k, b)
#endif
 
static void kgem_sna_reset(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.reset(sna);
sna->blt_state.fill_bo = 0;
}
 
static void kgem_sna_flush(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.flush(sna);
 
// if (sna->render.solid_cache.dirty)
// sna_render_flush_solid(sna);
}
 
static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
{
struct drm_i915_gem_set_tiling set_tiling;
int ret;
 
if (DBG_NO_TILING)
return false;
/*
VG_CLEAR(set_tiling);
do {
set_tiling.handle = handle;
set_tiling.tiling_mode = tiling;
set_tiling.stride = stride;
 
ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
*/
return false;//ret == 0;
}
 
static bool gem_set_cacheing(int fd, uint32_t handle, int cacheing)
{
struct local_i915_gem_cacheing arg;
 
VG_CLEAR(arg);
arg.handle = handle;
arg.cacheing = cacheing;
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHEING, &arg) == 0;
}
 
 
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: not retiring per-request\n", __FUNCTION__));
return false;
}
 
if (!kgem->need_retire) {
DBG(("%s: nothing to retire\n", __FUNCTION__));
return false;
}
 
if (kgem_retire(kgem))
return true;
 
if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
DBG(("%s: not throttling\n", __FUNCTION__));
return false;
}
 
kgem_throttle(kgem);
return kgem_retire(kgem);
}
 
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
void *ptr;
 
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
 
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
printf("%s: failed to retrieve GTT offset for handle=%d: %d\n",
__FUNCTION__, bo->handle, 0);
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry_gtt;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry_gtt;
}
 
return NULL;
}
 
retry_mmap:
ptr = (void*)(int)mmap_arg.offset;
if (ptr == NULL) {
printf("%s: failed to mmap %d, %d bytes, into GTT domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), 0);
 
}
 
return ptr;
}
 
static int __gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
static int gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
/* align the transfer to cachelines; fortuitously this is safe! */
if ((offset | length) & 63) {
pwrite.offset = offset & ~63;
pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
} else {
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
}
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
bool __kgem_busy(struct kgem *kgem, int handle)
{
struct drm_i915_gem_busy busy;
VG_CLEAR(busy);
busy.handle = handle;
busy.busy = !kgem->wedged;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
__FUNCTION__, handle, busy.busy, kgem->wedged));
 
return busy.busy;
}
 
static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
__FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
__kgem_busy(kgem, bo->handle)));
assert(bo->exec == NULL);
assert(list_is_empty(&bo->vma));
 
if (bo->rq) {
if (!__kgem_busy(kgem, bo->handle)) {
__kgem_bo_clear_busy(bo);
kgem_retire(kgem);
}
} else {
assert(!bo->needs_flush);
ASSERT_IDLE(kgem, bo->handle);
}
}
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
assert(bo->refcnt);
assert(!bo->purged);
assert(bo->proxy == NULL);
ASSERT_IDLE(kgem, bo->handle);
 
assert(length <= bytes(bo));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return false;
 
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
if (bo->exec == NULL) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_NONE;
}
return true;
}
 
static uint32_t gem_create(int fd, int num_pages)
{
struct drm_i915_gem_create create;
 
VG_CLEAR(create);
create.handle = 0;
create.size = PAGE_SIZE * num_pages;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 
return create.handle;
}
 
static bool
kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->exec == NULL);
assert(!bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = 1;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return true;
#endif
}
 
static bool
kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
if (!bo->purged)
return true;
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
return madv.retained;
 
return false;
#endif
}
 
static bool
kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_WILLNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = !madv.retained;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return false;
#endif
}
 
static void gem_close(int fd, uint32_t handle)
{
struct drm_gem_close close;
 
VG_CLEAR(close);
close.handle = handle;
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
}
 
constant inline static unsigned long __fls(unsigned long word)
{
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
return word;
#else
unsigned int v = 0;
 
while (word >>= 1)
v++;
 
return v;
#endif
}
 
constant inline static int cache_bucket(int num_pages)
{
return __fls(num_pages);
}
 
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
int handle, int num_pages)
{
assert(num_pages);
memset(bo, 0, sizeof(*bo));
 
bo->refcnt = 1;
bo->handle = handle;
bo->target_handle = -1;
num_pages(bo) = num_pages;
bucket(bo) = cache_bucket(num_pages);
bo->reusable = true;
bo->domain = DOMAIN_CPU;
list_init(&bo->request);
list_init(&bo->list);
list_init(&bo->vma);
 
return bo;
}
 
static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
{
struct kgem_bo *bo;
 
if (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
} else {
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
}
 
return __kgem_bo_init(bo, handle, num_pages);
}
 
static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
{
struct kgem_request *rq;
 
rq = __kgem_freed_request;
if (rq) {
__kgem_freed_request = *(struct kgem_request **)rq;
} else {
rq = malloc(sizeof(*rq));
if (rq == NULL)
rq = &kgem->static_request;
}
 
list_init(&rq->buffers);
rq->bo = NULL;
rq->ring = 0;
 
return rq;
}
 
static void __kgem_request_free(struct kgem_request *rq)
{
_list_del(&rq->list);
*(struct kgem_request **)rq = __kgem_freed_request;
__kgem_freed_request = rq;
}
 
static struct list *inactive(struct kgem *kgem, int num_pages)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->inactive[cache_bucket(num_pages)];
}
 
static struct list *active(struct kgem *kgem, int num_pages, int tiling)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->active[cache_bucket(num_pages)][tiling];
}
 
static size_t
agp_aperture_size(struct pci_device *dev, unsigned gen)
{
/* XXX assume that only future chipsets are unknown and follow
* the post gen2 PCI layout.
*/
// return dev->regions[gen < 030 ? 0 : 2].size;
 
return 0;
}
 
static size_t
total_ram_size(void)
{
uint32_t data[9];
size_t size = 0;
asm volatile("int $0x40"
: "=a" (size)
: "a" (18),"b"(20), "c" (data)
: "memory");
return size != -1 ? size : 0;
}
 
static int gem_param(struct kgem *kgem, int name)
{
drm_i915_getparam_t gp;
int v = -1; /* No param uses the sign bit, reserve it for errors */
 
VG_CLEAR(gp);
gp.param = name;
gp.value = &v;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
return -1;
 
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
return v;
}
 
static bool test_has_execbuffer2(struct kgem *kgem)
{
return 1;
}
 
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
 
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
 
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
bool detected = false;
int ret;
 
if (DBG_NO_SEMAPHORES)
return false;
 
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
if (ret != -1)
return ret > 0;
 
return detected;
}
 
static bool __kgem_throttle(struct kgem *kgem)
{
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0)
return false;
 
return errno == EIO;
}
 
static bool is_hw_supported(struct kgem *kgem,
struct pci_device *dev)
{
if (DBG_NO_HW)
return false;
 
if (!test_has_execbuffer2(kgem))
return false;
 
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
return kgem->has_blt;
 
/* Although pre-855gm the GMCH is fubar, it works mostly. So
* let the user decide through "NoAccel" whether or not to risk
* hw acceleration.
*/
 
if (kgem->gen == 060 && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
 
if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
return kgem->has_blt;
 
return true;
}
 
static bool test_has_relaxed_fencing(struct kgem *kgem)
{
if (kgem->gen < 040) {
if (DBG_NO_RELAXED_FENCING)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
} else
return true;
}
 
static bool test_has_llc(struct kgem *kgem)
{
int has_llc = -1;
 
if (DBG_NO_LLC)
return false;
 
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
#endif
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
}
 
return has_llc;
}
 
static bool test_has_cacheing(struct kgem *kgem)
{
uint32_t handle;
bool ret;
 
if (DBG_NO_CACHE_LEVEL)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
handle = gem_create(kgem->fd, 1);
if (handle == 0)
return false;
 
ret = gem_set_cacheing(kgem->fd, handle, UNCACHED);
gem_close(kgem->fd, handle);
return ret;
}
 
static bool test_has_userptr(struct kgem *kgem)
{
#if defined(USE_USERPTR)
uint32_t handle;
void *ptr;
 
if (DBG_NO_USERPTR)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
ptr = malloc(PAGE_SIZE);
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
gem_close(kgem->fd, handle);
free(ptr);
 
return handle != 0;
#else
return false;
#endif
}
 
static bool test_has_secure_batches(struct kgem *kgem)
{
if (DBG_NO_SECURE_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
}
 
static bool test_has_pinned_batches(struct kgem *kgem)
{
if (DBG_NO_PINNED_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
}
 
 
static bool kgem_init_pinned_batches(struct kgem *kgem)
{
int count[2] = { 2, 1 };
int size[2] = { 1, 2 };
int n, i;
 
if (kgem->wedged)
return true;
 
for (n = 0; n < ARRAY_SIZE(count); n++) {
for (i = 0; i < count[n]; i++) {
struct drm_i915_gem_pin pin;
struct kgem_bo *bo;
 
VG_CLEAR(pin);
 
pin.handle = gem_create(kgem->fd, size[n]);
if (pin.handle == 0)
goto err;
 
DBG(("%s: new handle=%d, num_pages=%d\n",
__FUNCTION__, pin.handle, size[n]));
 
bo = __kgem_bo_alloc(pin.handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, pin.handle);
goto err;
}
 
pin.alignment = 0;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
gem_close(kgem->fd, pin.handle);
goto err;
}
bo->presumed_offset = pin.offset;
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
}
 
return true;
 
err:
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
 
/* For simplicity populate the lists with a single unpinned bo */
for (n = 0; n < ARRAY_SIZE(count); n++) {
struct kgem_bo *bo;
uint32_t handle;
 
handle = gem_create(kgem->fd, size[n]);
if (handle == 0)
break;
 
bo = __kgem_bo_alloc(handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, handle);
break;
}
 
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
return false;
}
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
unsigned half_gpu_max;
unsigned int i, j;
 
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
 
memset(kgem, 0, sizeof(*kgem));
 
kgem->fd = fd;
kgem->gen = gen;
 
list_init(&kgem->requests[0]);
list_init(&kgem->requests[1]);
list_init(&kgem->batch_buffers);
list_init(&kgem->active_buffers);
list_init(&kgem->flushing);
list_init(&kgem->large);
list_init(&kgem->large_inactive);
list_init(&kgem->snoop);
list_init(&kgem->scanout);
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
list_init(&kgem->pinned_batches[i]);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
list_init(&kgem->active[i][j]);
}
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
list_init(&kgem->vma[i].inactive[j]);
}
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
 
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
 
kgem->has_relaxed_delta =
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
kgem->has_relaxed_delta));
 
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
kgem->has_relaxed_fencing));
 
kgem->has_llc = test_has_llc(kgem);
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
kgem->has_llc));
 
kgem->has_cacheing = test_has_cacheing(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_cacheing));
 
kgem->has_userptr = test_has_userptr(kgem);
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
 
kgem->has_no_reloc = test_has_no_reloc(kgem);
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
 
kgem->has_handle_lut = test_has_handle_lut(kgem);
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
 
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
kgem->has_semaphores));
 
kgem->can_blt_cpu = gen >= 030;
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
 
kgem->has_secure_batches = test_has_secure_batches(kgem);
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
 
kgem->has_pinned_batches = test_has_pinned_batches(kgem);
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
kgem->has_pinned_batches));
 
if (!is_hw_supported(kgem, dev)) {
printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
kgem->wedged = 1;
} else if (__kgem_throttle(kgem)) {
printf("Detected a hung GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
kgem->batch_size = ARRAY_SIZE(kgem->batch);
if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
if (gen == 022)
/* 865g cannot handle a batch spanning multiple pages */
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
if ((gen >> 3) == 7)
kgem->batch_size = 16*1024;
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
kgem->batch_size = 4*1024;
 
if (!kgem_init_pinned_batches(kgem) && gen == 020) {
printf("Unable to reserve memory for GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
 
kgem->min_alignment = 16;
if (gen < 040)
kgem->min_alignment = 64;
 
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: half cpu cache %d pages\n", __FUNCTION__,
kgem->half_cpu_cache_pages));
 
kgem->next_request = __kgem_request_alloc(kgem);
 
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_cacheing),
kgem->has_llc, kgem->has_cacheing, kgem->has_userptr));
 
VG_CLEAR(aperture);
aperture.aper_size = 0;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
if (aperture.aper_size == 0)
aperture.aper_size = 64*1024*1024;
 
DBG(("%s: aperture size %lld, available now %lld\n",
__FUNCTION__,
(long long)aperture.aper_size,
(long long)aperture.aper_available_size));
 
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/3;
if (gen < 033) {
/* Severe alignment penalties */
kgem->aperture_high /= 2;
kgem->aperture_low /= 2;
}
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
 
kgem->aperture_mappable = agp_aperture_size(dev, gen);
if (kgem->aperture_mappable == 0 ||
kgem->aperture_mappable > aperture.aper_size)
kgem->aperture_mappable = aperture.aper_size;
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
 
kgem->buffer_size = 64 * 1024;
while (kgem->buffer_size < kgem->aperture_mappable >> 10)
kgem->buffer_size *= 2;
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
kgem->buffer_size, kgem->buffer_size / 1024));
 
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc)
kgem->max_gpu_size = MAX_CACHE_SIZE;
 
totalram = total_ram_size();
if (totalram == 0) {
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
__FUNCTION__));
totalram = kgem->aperture_total;
}
DBG(("%s: total ram=%u\n", __FUNCTION__, totalram));
if (kgem->max_object_size > totalram / 2)
kgem->max_object_size = totalram / 2;
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
 
kgem->max_cpu_size = kgem->max_object_size;
 
half_gpu_max = kgem->max_gpu_size / 2;
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
if (kgem->max_copy_tile_size > half_gpu_max)
kgem->max_copy_tile_size = half_gpu_max;
 
if (kgem->has_llc)
kgem->max_upload_tile_size = kgem->max_copy_tile_size;
else
kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
if (kgem->max_upload_tile_size > half_gpu_max)
kgem->max_upload_tile_size = half_gpu_max;
 
kgem->large_object_size = MAX_CACHE_SIZE;
if (kgem->large_object_size > kgem->max_gpu_size)
kgem->large_object_size = kgem->max_gpu_size;
 
if (kgem->has_llc | kgem->has_cacheing | kgem->has_userptr) {
if (kgem->large_object_size > kgem->max_cpu_size)
kgem->large_object_size = kgem->max_cpu_size;
} else
kgem->max_cpu_size = 0;
if (DBG_NO_CPU)
kgem->max_cpu_size = 0;
 
DBG(("%s: maximum object size=%d\n",
__FUNCTION__, kgem->max_object_size));
DBG(("%s: large object thresold=%d\n",
__FUNCTION__, kgem->large_object_size));
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
__FUNCTION__,
kgem->max_gpu_size, kgem->max_cpu_size,
kgem->max_upload_tile_size, kgem->max_copy_tile_size));
 
/* Convert the aperture thresholds to pages */
kgem->aperture_low /= PAGE_SIZE;
kgem->aperture_high /= PAGE_SIZE;
 
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
 
kgem->batch_flags_base = 0;
if (kgem->has_no_reloc)
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
 
/* XXX hopefully a good approximation */
static uint32_t kgem_get_unique_id(struct kgem *kgem)
{
uint32_t id;
id = ++kgem->unique_id;
if (id == 0)
id = ++kgem->unique_id;
return id;
}
 
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_PRIME)
return 256;
if (flags & CREATE_SCANOUT)
return 64;
return kgem->min_alignment;
}
 
static uint32_t kgem_untiled_pitch(struct kgem *kgem,
uint32_t width, uint32_t bpp,
unsigned flags)
{
width = ALIGN(width, 2) * bpp >> 3;
return ALIGN(width, kgem_pitch_alignment(kgem, flags));
}
 
uint32_t kgem_surface_size(struct kgem *kgem,
bool relaxed_fencing,
unsigned flags,
uint32_t width,
uint32_t height,
uint32_t bpp,
uint32_t tiling,
uint32_t *pitch)
{
uint32_t tile_width, tile_height;
uint32_t size;
 
assert(width <= MAXSHORT);
assert(height <= MAXSHORT);
 
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
tile_width = 128;
tile_height = 32;
} else {
tile_width = 512;
tile_height = 16;
}
} else {
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
break;
 
/* XXX align to an even tile row */
case I915_TILING_X:
tile_width = 512;
tile_height = 16;
break;
case I915_TILING_Y:
tile_width = 128;
tile_height = 64;
break;
}
 
*pitch = ALIGN(width * bpp / 8, tile_width);
height = ALIGN(height, tile_height);
if (kgem->gen >= 040)
return PAGE_ALIGN(*pitch * height);
 
/* If it is too wide for the blitter, don't even bother. */
if (tiling != I915_TILING_NONE) {
if (*pitch > 8192)
return 0;
 
for (size = tile_width; size < *pitch; size <<= 1)
;
*pitch = size;
} else {
if (*pitch >= 32768)
return 0;
}
 
size = *pitch * height;
if (relaxed_fencing || tiling == I915_TILING_NONE)
return PAGE_ALIGN(size);
 
/* We need to allocate a pot fence region for a tiled buffer. */
if (kgem->gen < 030)
tile_width = 512 * 1024;
else
tile_width = 1024 * 1024;
while (tile_width < size)
tile_width *= 2;
return tile_width;
}
 
static uint32_t kgem_aligned_height(struct kgem *kgem,
uint32_t height, uint32_t tiling)
{
uint32_t tile_height;
 
if (kgem->gen <= 030) {
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
} else switch (tiling) {
/* XXX align to an even tile row */
default:
case I915_TILING_NONE:
tile_height = 1;
break;
case I915_TILING_X:
tile_height = 16;
break;
case I915_TILING_Y:
tile_height = 64;
break;
}
 
return ALIGN(height, tile_height);
}
 
static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_exec_object2 *exec;
 
DBG(("%s: handle=%d, index=%d\n",
__FUNCTION__, bo->handle, kgem->nexec));
 
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
 
kgem->aperture += num_pages(bo);
 
return exec;
}
 
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
bo->exec = kgem_add_handle(kgem, bo);
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
list_move_tail(&bo->request, &kgem->next_request->buffers);
 
/* XXX is it worth working around gcc here? */
kgem->flush |= bo->flush;
}
 
static uint32_t kgem_end_batch(struct kgem *kgem)
{
kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
if (kgem->nbatch & 1)
kgem->batch[kgem->nbatch++] = MI_NOOP;
 
return kgem->nbatch;
}
 
static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
{
int n;
 
if (kgem->nreloc__self == 0)
return;
 
for (n = 0; n < kgem->nreloc__self; n++) {
int i = kgem->reloc__self[n];
assert(kgem->reloc[i].target_handle == ~0U);
kgem->reloc[i].target_handle = bo->target_handle;
kgem->reloc[i].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
kgem->reloc[i].delta + bo->presumed_offset;
}
 
if (n == 256) {
for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == ~0U) {
kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
}
}
 
}
 
}
 
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_bo_binding *b;
 
b = bo->binding.next;
while (b) {
struct kgem_bo_binding *next = b->next;
free (b);
b = next;
}
}
 
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
{
int type = IS_CPU_MAP(bo->map);
 
assert(!IS_USER_MAP(bo->map));
 
DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
__FUNCTION__, type ? "CPU" : "GTT",
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
list_del(&bo->vma);
kgem->vma[type].count--;
}
}
 
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
assert(!bo->snoop || bo->rq == NULL);
 
#ifdef DEBUG_MEMORY
kgem->debug_memory.bo_allocs--;
kgem->debug_memory.bo_bytes -= bytes(bo);
#endif
 
kgem_bo_binding_free(kgem, bo);
 
if (IS_USER_MAP(bo->map)) {
assert(bo->rq == NULL);
assert(MAP(bo->map) != bo || bo->io);
if (bo != MAP(bo->map)) {
DBG(("%s: freeing snooped base\n", __FUNCTION__));
free(MAP(bo->map));
}
bo->map = NULL;
}
if (bo->map)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
 
_list_del(&bo->list);
_list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
 
if (!bo->io) {
*(struct kgem_bo **)bo = __kgem_freed_bo;
__kgem_freed_bo = bo;
} else
free(bo);
}
 
inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(bo->domain != DOMAIN_GPU);
assert(!bo->proxy);
assert(!bo->io);
assert(!bo->scanout);
assert(!bo->needs_flush);
assert(list_is_empty(&bo->vma));
ASSERT_IDLE(kgem, bo->handle);
 
kgem->need_expire = true;
 
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
list_move(&bo->list, &kgem->large_inactive);
return;
}
 
assert(bo->flush == false);
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
(!type && !__kgem_bo_is_mappable(kgem, bo))) {
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
}
if (bo->map) {
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
kgem->vma[type].count++;
}
}
}
 
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
{
struct kgem_bo *base;
 
if (!bo->io)
return bo;
 
assert(!bo->snoop);
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
__FUNCTION__, bo->handle));
/* transfer the handle to a minimum bo */
memcpy(base, bo, sizeof(*base));
base->io = false;
list_init(&base->list);
list_replace(&bo->request, &base->request);
list_replace(&bo->vma, &base->vma);
free(bo);
bo = base;
} else
bo->reusable = false;
 
return bo;
}
 
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (bo->map) {
assert(!list_is_empty(&bo->vma));
list_del(&bo->vma);
kgem->vma[IS_CPU_MAP(bo->map)].count--;
}
}
 
inline static void kgem_bo_remove_from_active(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq != NULL);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
assert(list_is_empty(&bo->vma));
}
 
static void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->scanout);
assert(!bo->refcnt);
assert(bo->exec == NULL);
assert(bo->proxy == NULL);
 
DBG(("%s: handle=%d, fb=%d (reusable=%d)\n",
__FUNCTION__, bo->handle, bo->delta, bo->reusable));
if (bo->delta) {
/* XXX will leak if we are not DRM_MASTER. *shrug* */
// drmModeRmFB(kgem->fd, bo->delta);
bo->delta = 0;
}
 
bo->scanout = false;
bo->flush = false;
bo->reusable = true;
 
if (kgem->has_llc &&
!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED))
bo->reusable = false;
}
 
static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
 
DBG(("%s: size=%d, offset=%d, parent used=%d\n",
__FUNCTION__, bo->size.bytes, bo->delta, io->used));
 
if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
io->used = bo->delta;
}
 
static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->scanout);
assert(bo->delta);
assert(!bo->snoop);
assert(!bo->io);
 
DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
__FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
if (bo->rq)
list_move_tail(&bo->list, &kgem->scanout);
else
list_move(&bo->list, &kgem->scanout);
}
 
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
 
if (num_pages(bo) > kgem->max_cpu_size >> 13) {
DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
__FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
kgem_bo_free(kgem, bo);
return;
}
 
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
 
DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
list_add(&bo->list, &kgem->snoop);
}
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
 
DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
 
if ((kgem->has_cacheing | kgem->has_userptr) == 0)
return NULL;
 
if (list_is_empty(&kgem->snoop)) {
DBG(("%s: inactive and cache empty\n", __FUNCTION__));
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
}
 
list_for_each_entry(bo, &kgem->snoop, list) {
assert(bo->refcnt == 0);
assert(bo->snoop);
assert(!bo->scanout);
assert(bo->proxy == NULL);
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
 
if (num_pages > num_pages(bo))
continue;
 
if (num_pages(bo) > 2*num_pages) {
if (first == NULL)
first = bo;
continue;
}
 
list_del(&bo->list);
bo->pitch = 0;
bo->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
return bo;
}
 
if (first) {
list_del(&first->list);
first->pitch = 0;
first->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, first->handle, num_pages(first)));
return first;
}
 
return NULL;
}
 
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(list_is_empty(&bo->list));
assert(bo->refcnt == 0);
assert(!bo->purged);
assert(bo->proxy == NULL);
 
bo->binding.offset = 0;
 
if (DBG_NO_CACHE)
goto destroy;
 
if (bo->snoop && !bo->flush) {
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
assert(!bo->flush);
assert(list_is_empty(&bo->list));
if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
if (bo->rq == NULL) {
assert(!bo->needs_flush);
kgem_bo_move_to_snoop(kgem, bo);
}
return;
}
 
if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
return;
}
 
if (bo->io)
bo = kgem_bo_replace_io(bo);
if (!bo->reusable) {
DBG(("%s: handle=%d, not reusable\n",
__FUNCTION__, bo->handle));
goto destroy;
}
 
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
 
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->snoop == false);
assert(bo->io == false);
assert(bo->scanout == false);
 
if (bo->exec && kgem->nexec == 1) {
DBG(("%s: only handle in batch, discarding last operations\n",
__FUNCTION__));
assert(bo->exec == &kgem->exec[0]);
assert(kgem->exec[0].handle == bo->handle);
assert(RQ(bo->rq) == kgem->next_request);
bo->refcnt = 1;
kgem_reset(kgem);
bo->refcnt = 0;
}
 
if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
if (bo->rq) {
struct list *cache;
 
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
if (bucket(bo) < NUM_CACHE_BUCKETS)
cache = &kgem->active[bucket(bo)][bo->tiling];
else
cache = &kgem->large;
list_add(&bo->list, cache);
return;
}
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
 
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
 
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
goto destroy;
 
DBG(("%s: handle=%d, purged\n",
__FUNCTION__, bo->handle));
}
 
kgem_bo_move_to_inactive(kgem, bo);
return;
 
destroy:
if (!bo->exec)
kgem_bo_free(kgem, bo);
}
 
static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
__kgem_bo_destroy(kgem, bo);
}
 
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
{
while (!list_is_empty(&bo->base.vma)) {
struct kgem_bo *cached;
 
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
assert(cached->proxy == &bo->base);
list_del(&cached->vma);
 
assert(*(struct kgem_bo **)cached->map == cached);
*(struct kgem_bo **)cached->map = NULL;
cached->map = NULL;
 
kgem_bo_destroy(kgem, cached);
}
}
 
static bool kgem_retire__buffers(struct kgem *kgem)
{
bool retired = false;
 
while (!list_is_empty(&kgem->active_buffers)) {
struct kgem_buffer *bo =
list_last_entry(&kgem->active_buffers,
struct kgem_buffer,
base.list);
 
if (bo->base.rq)
break;
 
DBG(("%s: releasing upload cache for handle=%d? %d\n",
__FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
list_del(&bo->base.list);
kgem_buffer_release(kgem, bo);
kgem_bo_unref(kgem, &bo->base);
retired = true;
}
 
return retired;
}
 
static bool kgem_retire__flushing(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
bool retired = false;
 
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
assert(bo->rq == (void *)kgem);
assert(bo->exec == NULL);
 
if (__kgem_busy(kgem, bo->handle))
break;
 
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else
kgem_bo_free(kgem, bo);
}
#if HAS_DEBUG_FULL
{
int count = 0;
list_for_each_entry(bo, &kgem->flushing, request)
count++;
printf("%s: %d bo on flushing list\n", __FUNCTION__, count);
}
#endif
 
kgem->need_retire |= !list_is_empty(&kgem->flushing);
 
return retired;
}
 
 
static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
bool retired = false;
 
DBG(("%s: request %d complete\n",
__FUNCTION__, rq->bo->handle));
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
assert(RQ(bo->rq) == rq);
assert(bo->exec == NULL);
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
 
list_del(&bo->request);
 
if (bo->needs_flush)
bo->needs_flush = __kgem_busy(kgem, bo->handle);
if (bo->needs_flush) {
DBG(("%s: moving %d to flushing\n",
__FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
continue;
}
 
bo->domain = DOMAIN_NONE;
bo->rq = NULL;
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
assert(rq->bo->rq == NULL);
assert(list_is_empty(&rq->bo->request));
 
if (--rq->bo->refcnt == 0) {
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, rq->bo->handle));
kgem_bo_free(kgem, rq->bo);
}
}
 
__kgem_request_free(rq);
return retired;
}
 
static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
{
bool retired = false;
 
while (!list_is_empty(&kgem->requests[ring])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list);
if (__kgem_busy(kgem, rq->bo->handle))
break;
 
retired |= __kgem_retire_rq(kgem, rq);
}
 
#if HAS_DEBUG_FULL
{
struct kgem_bo *bo;
int count = 0;
 
list_for_each_entry(bo, &kgem->requests[ring], request)
count++;
 
bo = NULL;
if (!list_is_empty(&kgem->requests[ring]))
bo = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list)->bo;
 
printf("%s: ring=%d, %d outstanding requests, oldest=%d\n",
__FUNCTION__, ring, count, bo ? bo->handle : 0);
}
#endif
 
return retired;
}
 
static bool kgem_retire__requests(struct kgem *kgem)
{
bool retired = false;
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
retired |= kgem_retire__requests_ring(kgem, n);
kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
}
 
return retired;
}
 
bool kgem_retire(struct kgem *kgem)
{
bool retired = false;
 
DBG(("%s\n", __FUNCTION__));
 
kgem->need_retire = false;
 
retired |= kgem_retire__flushing(kgem);
retired |= kgem_retire__requests(kgem);
retired |= kgem_retire__buffers(kgem);
 
DBG(("%s -- retired=%d, need_retire=%d\n",
__FUNCTION__, retired, kgem->need_retire));
 
kgem->retire(kgem);
 
return retired;
}
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
{
struct kgem_request *rq;
 
assert(!list_is_empty(&kgem->requests[ring]));
 
rq = list_last_entry(&kgem->requests[ring],
struct kgem_request, list);
if (__kgem_busy(kgem, rq->bo->handle)) {
DBG(("%s: last requests handle=%d still busy\n",
__FUNCTION__, rq->bo->handle));
return false;
}
 
DBG(("%s: ring=%d idle (handle=%d)\n",
__FUNCTION__, ring, rq->bo->handle));
 
kgem_retire__requests_ring(kgem, ring);
assert(list_is_empty(&kgem->requests[ring]));
return true;
}
 
static void kgem_commit(struct kgem *kgem)
{
struct kgem_request *rq = kgem->next_request;
struct kgem_bo *bo, *next;
 
list_for_each_entry_safe(bo, next, &rq->buffers, request) {
assert(next->request.prev == &bo->request);
 
DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
__FUNCTION__, bo->handle, bo->proxy != NULL,
bo->dirty, bo->needs_flush, bo->snoop,
(unsigned)bo->exec->offset));
 
assert(!bo->purged);
assert(bo->exec);
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
 
bo->presumed_offset = bo->exec->offset;
bo->exec = NULL;
bo->target_handle = -1;
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
kgem_bo_free(kgem, bo);
continue;
}
 
bo->binding.offset = 0;
bo->domain = DOMAIN_GPU;
bo->dirty = false;
 
if (bo->proxy) {
/* proxies are not used for domain tracking */
bo->exec = NULL;
__kgem_bo_clear_busy(bo);
}
 
kgem->scanout_busy |= bo->scanout;
}
 
if (rq == &kgem->static_request) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
}
 
kgem_retire(kgem);
assert(list_is_empty(&rq->buffers));
 
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
} else {
list_add_tail(&rq->list, &kgem->requests[rq->ring]);
kgem->need_throttle = kgem->need_retire = 1;
}
 
kgem->next_request = NULL;
}
 
static void kgem_close_list(struct kgem *kgem, struct list *head)
{
while (!list_is_empty(head))
kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
}
 
static void kgem_close_inactive(struct kgem *kgem)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
kgem_close_list(kgem, &kgem->inactive[i]);
}
 
static void kgem_finish_buffers(struct kgem *kgem)
{
struct kgem_buffer *bo, *next;
 
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%d\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
bo->write, bo->mmapped));
 
assert(next->base.list.prev == &bo->base.list);
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
if (!bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d\n",
__FUNCTION__, bo->base.handle, bo->used));
continue;
}
 
if (!bo->write) {
assert(bo->base.exec || bo->base.refcnt > 1);
goto decouple;
}
 
if (bo->mmapped) {
int used;
 
assert(!bo->need_io);
 
used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
continue;
}
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
goto decouple;
}
 
if (!bo->used) {
/* Unless we replace the handle in the execbuffer,
* then this bo will become active. So decouple it
* from the buffer list and track it in the normal
* manner.
*/
goto decouple;
}
 
assert(bo->need_io);
assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(bo->base.domain != DOMAIN_GPU);
 
if (bo->base.refcnt == 1 &&
bo->base.size.pages.count > 1 &&
bo->used < bytes(&bo->base) / 2) {
struct kgem_bo *shrink;
unsigned alloc = NUM_PAGES(bo->used);
 
shrink = search_snoop_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
void *map;
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
map = kgem_bo_map__cpu(kgem, shrink);
if (map) {
kgem_bo_sync__cpu(kgem, shrink);
memcpy(map, bo->mem, bo->used);
 
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
 
shrink = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
if (gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem) == 0) {
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
}
 
DBG(("%s: handle=%d, uploading %d/%d\n",
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
bo->need_io = 0;
 
decouple:
DBG(("%s: releasing handle=%d\n",
__FUNCTION__, bo->base.handle));
list_del(&bo->base.list);
kgem_bo_unref(kgem, &bo->base);
}
}
 
static void kgem_cleanup(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
while (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
bo->exec = NULL;
bo->dirty = false;
__kgem_bo_clear_busy(bo);
if (bo->refcnt == 0)
kgem_bo_free(kgem, bo);
}
 
__kgem_request_free(rq);
}
}
 
kgem_close_inactive(kgem);
}
 
static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
{
int ret;
 
ASSERT_IDLE(kgem, handle);
 
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
 
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
}
 
/* Disjoint surface/batch, upload separately */
ret = gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
if (ret)
return ret;
 
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
}
 
void kgem_reset(struct kgem *kgem)
{
if (kgem->next_request) {
struct kgem_request *rq = kgem->next_request;
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo =
list_first_entry(&rq->buffers,
struct kgem_bo,
request);
list_del(&bo->request);
 
assert(RQ(bo->rq) == rq);
 
bo->binding.offset = 0;
bo->exec = NULL;
bo->target_handle = -1;
bo->dirty = false;
 
if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
} else
__kgem_bo_clear_busy(bo);
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
DBG(("%s: discarding handle=%d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
if (rq != &kgem->static_request) {
list_init(&rq->list);
__kgem_request_free(rq);
}
}
 
kgem->nfence = 0;
kgem->nexec = 0;
kgem->nreloc = 0;
kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
kgem->batch_flags = kgem->batch_flags_base;
 
kgem->next_request = __kgem_request_alloc(kgem);
 
kgem_sna_reset(kgem);
}
 
static int compact_batch_surface(struct kgem *kgem)
{
int size, shrink, n;
 
if (!kgem->has_relaxed_delta)
return kgem->batch_size;
 
/* See if we can pack the contents into one or two pages */
n = ALIGN(kgem->batch_size, 1024);
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
 
shrink = n - size;
if (shrink) {
DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
 
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
 
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
kgem->reloc[n].offset -= shrink;
}
}
 
return size * sizeof(uint32_t);
}
 
static struct kgem_bo *
kgem_create_batch(struct kgem *kgem, int size)
{
struct drm_i915_gem_set_domain set_domain;
struct kgem_bo *bo;
 
if (size <= 4096) {
bo = list_first_entry(&kgem->pinned_batches[0],
struct kgem_bo,
list);
if (!bo->rq) {
out_4096:
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_4096;
}
}
 
if (size <= 16384) {
bo = list_first_entry(&kgem->pinned_batches[1],
struct kgem_bo,
list);
if (!bo->rq) {
out_16384:
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_16384;
}
}
 
if (kgem->gen == 020 && !kgem->has_pinned_batches) {
assert(size <= 16384);
 
bo = list_first_entry(&kgem->pinned_batches[size > 4096],
struct kgem_bo,
list);
list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
 
DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
return NULL;
}
 
kgem_retire(kgem);
assert(bo->rq == NULL);
return kgem_bo_reference(bo);
}
 
return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
}
 
void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
uint32_t batch_end;
int size;
 
assert(!DBG_NO_HW);
assert(!kgem->wedged);
 
assert(kgem->nbatch);
assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
assert(kgem->nbatch <= kgem->surface);
 
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
 
DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
 
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
assert(kgem->nfence <= kgem->fence_max);
 
kgem_finish_buffers(kgem);
 
#if SHOW_BATCH
__kgem_batch_debug(kgem, batch_end);
#endif
 
rq = kgem->next_request;
if (kgem->surface != kgem->batch_size)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
rq->bo = kgem_create_batch(kgem, size);
if (rq->bo) {
uint32_t handle = rq->bo->handle;
int i;
 
assert(!rq->bo->needs_flush);
 
i = kgem->nexec++;
kgem->exec[i].handle = handle;
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
 
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
 
kgem_fixup_self_relocs(kgem, rq->bo);
 
if (kgem_batch_write(kgem, handle, size) == 0) {
struct drm_i915_gem_execbuffer2 execbuf;
int ret, retry = 3;
 
VG_CLEAR(execbuf);
execbuf.buffers_ptr = (uintptr_t)kgem->exec;
execbuf.buffer_count = kgem->nexec;
execbuf.batch_start_offset = 0;
execbuf.batch_len = batch_end*sizeof(uint32_t);
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
execbuf.DR4 = 0;
execbuf.flags = kgem->ring | kgem->batch_flags;
execbuf.rsvd1 = 0;
execbuf.rsvd2 = 0;
 
 
 
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
while (ret == -1 && errno == EBUSY && retry--) {
__kgem_throttle(kgem);
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
}
if (DEBUG_SYNC && ret == 0) {
struct drm_i915_gem_set_domain set_domain;
 
VG_CLEAR(set_domain);
set_domain.handle = handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
}
if (ret == -1) {
// DBG(("%s: GPU hang detected [%d]\n",
// __FUNCTION__, errno));
kgem_throttle(kgem);
kgem->wedged = true;
 
#if 0
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
 
for (i = 0; i < kgem->nexec; i++) {
struct kgem_bo *bo, *found = NULL;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request) {
if (bo->handle == kgem->exec[i].handle) {
found = bo;
break;
}
}
ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
i,
kgem->exec[i].handle,
(int)kgem->exec[i].offset,
found ? kgem_bo_size(found) : -1,
found ? found->tiling : -1,
(int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
found ? found->snoop : -1,
found ? found->purged : -1);
}
for (i = 0; i < kgem->nreloc; i++) {
ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
i,
(int)kgem->reloc[i].offset,
kgem->reloc[i].target_handle,
kgem->reloc[i].delta,
kgem->reloc[i].read_domains,
kgem->reloc[i].write_domain,
(int)kgem->reloc[i].presumed_offset);
}
 
if (DEBUG_SYNC) {
int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
if (fd != -1) {
write(fd, kgem->batch, batch_end*sizeof(uint32_t));
close(fd);
}
 
FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
}
#endif
}
}
 
kgem_commit(kgem);
}
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem_reset(kgem);
 
assert(kgem->next_request != NULL);
}
 
void kgem_throttle(struct kgem *kgem)
{
kgem->need_throttle = 0;
if (kgem->wedged)
return;
 
kgem->wedged = __kgem_throttle(kgem);
if (kgem->wedged) {
printf("Detected a hung GPU, disabling acceleration.\n");
printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
}
}
 
void kgem_purge_cache(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
if (!kgem_bo_is_retained(kgem, bo)) {
DBG(("%s: purging %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
}
 
kgem->need_purge = false;
}
 
bool kgem_expire_cache(struct kgem *kgem)
{
time_t now, expire;
struct kgem_bo *bo;
unsigned int size = 0, count = 0;
bool idle;
unsigned int i;
 
time(&now);
 
while (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
while (__kgem_freed_request) {
struct kgem_request *rq = __kgem_freed_request;
__kgem_freed_request = *(struct kgem_request **)rq;
free(rq);
}
 
while (!list_is_empty(&kgem->large_inactive)) {
kgem_bo_free(kgem,
list_first_entry(&kgem->large_inactive,
struct kgem_bo, list));
 
}
 
while (!list_is_empty(&kgem->scanout)) {
bo = list_first_entry(&kgem->scanout, struct kgem_bo, list);
if (__kgem_busy(kgem, bo->handle))
break;
 
list_del(&bo->list);
kgem_bo_clear_scanout(kgem, bo);
__kgem_bo_destroy(kgem, bo);
}
 
expire = 0;
list_for_each_entry(bo, &kgem->snoop, list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME/2;
break;
}
 
bo->delta = now;
}
if (expire) {
while (!list_is_empty(&kgem->snoop)) {
bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
 
if (bo->delta > expire)
break;
 
kgem_bo_free(kgem, bo);
}
}
#ifdef DEBUG_MEMORY
{
long snoop_size = 0;
int snoop_count = 0;
list_for_each_entry(bo, &kgem->snoop, list)
snoop_count++, snoop_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
__FUNCTION__, snoop_count, snoop_size);
}
#endif
 
kgem_retire(kgem);
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem->expire(kgem);
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
expire = 0;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
idle &= list_is_empty(&kgem->inactive[i]);
list_for_each_entry(bo, &kgem->inactive[i], list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME;
break;
}
 
bo->delta = now;
}
}
if (idle) {
DBG(("%s: idle\n", __FUNCTION__));
kgem->need_expire = false;
return false;
}
if (expire == 0)
return true;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
struct list preserve;
 
list_init(&preserve);
while (!list_is_empty(&kgem->inactive[i])) {
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
 
if (bo->delta > expire) {
idle = false;
break;
}
 
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
idle = false;
list_move_tail(&bo->list, &preserve);
} else {
count++;
size += bytes(bo);
kgem_bo_free(kgem, bo);
DBG(("%s: expiring %d\n",
__FUNCTION__, bo->handle));
}
}
if (!list_is_empty(&preserve)) {
preserve.prev->next = kgem->inactive[i].next;
kgem->inactive[i].next->prev = preserve.prev;
kgem->inactive[i].next = preserve.next;
preserve.next->prev = &kgem->inactive[i];
}
}
 
#ifdef DEBUG_MEMORY
{
long inactive_size = 0;
int inactive_count = 0;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_for_each_entry(bo, &kgem->inactive[i], list)
inactive_count++, inactive_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
__FUNCTION__, inactive_count, inactive_size);
}
#endif
 
DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
__FUNCTION__, count, size, idle));
 
kgem->need_expire = !idle;
return !idle;
(void)count;
(void)size;
}
 
void kgem_cleanup_cache(struct kgem *kgem)
{
unsigned int i;
int n;
 
/* sync to the most recent request */
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
if (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
struct drm_i915_gem_set_domain set_domain;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
 
DBG(("%s: sync on cleanup\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
(void)drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN,
&set_domain);
}
}
 
kgem_retire(kgem);
kgem_cleanup(kgem);
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
while (!list_is_empty(&kgem->inactive[i]))
kgem_bo_free(kgem,
list_last_entry(&kgem->inactive[i],
struct kgem_bo, list));
}
 
while (!list_is_empty(&kgem->snoop))
kgem_bo_free(kgem,
list_last_entry(&kgem->snoop,
struct kgem_bo, list));
 
while (__kgem_freed_bo) {
struct kgem_bo *bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
kgem->need_purge = false;
kgem->need_expire = false;
}
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
bool use_active = (flags & CREATE_INACTIVE) == 0;
struct list *cache;
 
DBG(("%s: num_pages=%d, flags=%x, use_active? %d\n",
__FUNCTION__, num_pages, flags, use_active));
 
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE)
return NULL;
 
if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: inactive and cache bucket empty\n",
__FUNCTION__));
 
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: can not retire\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
DBG(("%s: active cache bucket empty\n", __FUNCTION__));
return NULL;
}
 
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: active cache bucket still empty after retire\n",
__FUNCTION__));
return NULL;
}
}
 
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
DBG(("%s: searching for inactive %s map\n",
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), num_pages));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling &&
!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (flags & CREATE_EXACT)
return NULL;
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
return NULL;
}
 
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
list_for_each_entry(bo, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!!bo->rq == !!use_active);
assert(bo->proxy == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
continue;
 
if (use_active &&
kgem->gen <= 040 &&
bo->tiling != I915_TILING_NONE)
continue;
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
continue;
 
if (first)
continue;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->map) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (IS_CPU_MAP(bo->map) != for_cpu) {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
if (first != NULL)
break;
 
first = bo;
continue;
}
}
 
if (use_active)
kgem_bo_remove_from_active(kgem, bo);
else
kgem_bo_remove_from_inactive(kgem, bo);
 
assert(bo->tiling == I915_TILING_NONE);
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n",
__FUNCTION__, bo->handle, num_pages(bo),
use_active ? "active" : "inactive"));
assert(list_is_empty(&bo->list));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (first) {
assert(first->tiling == I915_TILING_NONE);
 
if (use_active)
kgem_bo_remove_from_active(kgem, first);
else
kgem_bo_remove_from_inactive(kgem, first);
 
first->pitch = 0;
first->delta = 0;
DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
__FUNCTION__, first->handle, num_pages(first),
use_active ? "active" : "inactive"));
assert(list_is_empty(&first->list));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
return first;
}
 
return NULL;
}
 
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
{
struct kgem_bo *bo;
uint32_t handle;
 
DBG(("%s(%d)\n", __FUNCTION__, size));
 
if (flags & CREATE_GTT_MAP && kgem->has_llc) {
flags &= ~CREATE_GTT_MAP;
flags |= CREATE_CPU_MAP;
}
 
size = (size + PAGE_SIZE - 1) / PAGE_SIZE;
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
if (bo) {
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_CACHED)
return NULL;
 
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
bo = __kgem_bo_alloc(handle, size);
if (bo == NULL) {
gem_close(kgem->fd, handle);
return NULL;
}
 
debug_alloc__bo(kgem, bo);
return bo;
}
 
inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
{
unsigned int size;
 
assert(bo->tiling);
assert(kgem->gen < 040);
 
if (kgem->gen < 030)
size = 512 * 1024;
else
size = 1024 * 1024;
while (size < bytes(bo))
size *= 2;
 
return size;
}
 
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags)
{
struct list *cache;
struct kgem_bo *bo;
uint32_t pitch, untiled_pitch, tiled_height, size;
uint32_t handle;
int i, bucket, retry;
 
if (tiling < 0)
tiling = -tiling, flags |= CREATE_EXACT;
 
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
width, height, bpp, tiling,
!!(flags & CREATE_EXACT),
!!(flags & CREATE_INACTIVE),
!!(flags & CREATE_CPU_MAP),
!!(flags & CREATE_GTT_MAP),
!!(flags & CREATE_SCANOUT),
!!(flags & CREATE_PRIME),
!!(flags & CREATE_TEMPORARY)));
 
size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
assert(size && size <= kgem->max_object_size);
size /= PAGE_SIZE;
bucket = cache_bucket(size);
 
if (flags & CREATE_SCANOUT) {
assert((flags & CREATE_INACTIVE) == 0);
list_for_each_entry_reverse(bo, &kgem->scanout, list) {
assert(bo->scanout);
assert(bo->delta);
assert(!bo->purged);
 
if (size > num_pages(bo) || num_pages(bo) > 2*size)
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
 
if (flags & CREATE_INACTIVE)
goto large_inactive;
 
tiled_height = kgem_aligned_height(kgem, height, tiling);
untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
 
list_for_each_entry(bo, &kgem->large, list) {
assert(!bo->purged);
assert(!bo->scanout);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->flush == true);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch || bo->tiling != tiling) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
bo->tiling = tiling;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
large_inactive:
list_for_each_entry(bo, &kgem->large_inactive, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
 
if (size > num_pages(bo))
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->delta = 0;
DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
goto create;
}
 
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (kgem->has_llc && tiling == I915_TILING_NONE)
for_cpu = 1;
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
cache = &kgem->vma[for_cpu].inactive[bucket];
do {
list_for_each_entry(bo, cache, vma) {
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
assert(bo->flush == false);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
DBG(("inactive vma with wrong tiling: %d < %d\n",
bo->tiling, tiling));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
bo->pitch = pitch;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
 
kgem_bo_remove_from_inactive(kgem, bo);
 
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
} while (!list_is_empty(cache) &&
__kgem_throttle_retire(kgem, flags));
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
goto create;
}
 
if (flags & CREATE_INACTIVE)
goto skip_active_search;
 
/* Best active match */
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
retry = 3;
search_again:
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->active[bucket][tiling];
if (tiling) {
tiled_height = kgem_aligned_height(kgem, height, tiling);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert(!bo->scanout);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch) {
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
} else {
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->tiling == tiling);
assert(bo->flush == false);
 
if (num_pages(bo) < size)
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->pitch = pitch;
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
 
if (--retry && flags & CREATE_EXACT) {
if (kgem->gen >= 040) {
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
if (i == tiling)
continue;
 
cache = &kgem->active[bucket][i];
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (num_pages(bo) < size)
continue;
 
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = tiling;
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
}
 
bucket++;
goto search_again;
}
 
if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */
untiled_pitch = kgem_untiled_pitch(kgem, width, bpp, flags);
i = tiling;
while (--i >= 0) {
tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
cache = active(kgem, tiled_height / PAGE_SIZE, i);
tiled_height = kgem_aligned_height(kgem, height, i);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (bo->tiling) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
} else
bo->pitch = untiled_pitch;
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
}
}
 
skip_active_search:
bucket = cache_bucket(size);
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3)
retry = 3;
search_inactive:
/* Now just look for a close match and prefer any currently active */
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->inactive[bucket];
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->pitch = pitch;
bo->tiling = tiling;
 
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
assert(bo->pitch);
DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->refcnt == 0);
assert(bo->reusable);
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_INACTIVE &&
!list_is_empty(&kgem->active[bucket][tiling]) &&
__kgem_throttle_retire(kgem, flags)) {
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
if (--retry) {
bucket++;
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
create:
if (bucket >= NUM_CACHE_BUCKETS)
size = ALIGN(size, 1024);
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
bo = __kgem_bo_alloc(handle, size);
if (!bo) {
gem_close(kgem->fd, handle);
return NULL;
}
 
bo->domain = DOMAIN_CPU;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
if (tiling != I915_TILING_NONE &&
gem_set_tiling(kgem->fd, handle, tiling, pitch))
bo->tiling = tiling;
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
bo->flush = true;
}
 
assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
 
debug_alloc__bo(kgem, bo);
 
DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id,
size, num_pages(bo), bucket(bo)));
return bo;
}
 
#if 0
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags)
{
struct kgem_bo *bo;
int stride, size;
 
if (DBG_NO_CPU)
return NULL;
 
DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
 
if (kgem->has_llc) {
bo = kgem_create_2d(kgem, width, height, bpp,
I915_TILING_NONE, flags);
if (bo == NULL)
return bo;
 
assert(bo->tiling == I915_TILING_NONE);
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
return bo;
}
 
assert(width > 0 && height > 0);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
size = stride * ALIGN(height, 2);
assert(size >= PAGE_SIZE);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
if (bo) {
assert(bo->tiling == I915_TILING_NONE);
assert(bo->snoop);
bo->refcnt = 1;
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_cacheing) {
bo = kgem_create_linear(kgem, size, flags);
if (bo == NULL)
return NULL;
 
assert(bo->tiling == I915_TILING_NONE);
 
if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
bo->snoop = true;
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_userptr) {
void *ptr;
 
/* XXX */
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
return NULL;
 
bo = kgem_create_map(kgem, ptr, size, false);
if (bo == NULL) {
free(ptr);
return NULL;
}
 
bo->map = MAKE_USER_MAP(ptr);
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
return NULL;
}
 
#endif
 
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, proxy? %d\n",
__FUNCTION__, bo->handle, bo->proxy != NULL));
 
if (bo->proxy) {
_list_del(&bo->vma);
_list_del(&bo->request);
if (bo->io && bo->exec == NULL)
_kgem_bo_delete_buffer(kgem, bo);
kgem_bo_unref(kgem, bo->proxy);
kgem_bo_binding_free(kgem, bo);
free(bo);
return;
}
 
__kgem_bo_destroy(kgem, bo);
}
 
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->rq);
assert(bo->exec == NULL);
assert(bo->needs_flush);
 
/* The kernel will emit a flush *and* update its own flushing lists. */
if (!__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
DBG(("%s: handle=%d, busy?=%d\n",
__FUNCTION__, bo->handle, bo->rq != NULL));
}
 
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
 
bool kgem_check_bo(struct kgem *kgem, ...)
{
va_list ap;
struct kgem_bo *bo;
int num_exec = 0;
int num_pages = 0;
bool flush = false;
 
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
while (bo->proxy)
bo = bo->proxy;
if (bo->exec)
continue;
 
if (needs_semaphore(kgem, bo))
return false;
 
num_pages += num_pages(bo);
num_exec++;
 
flush |= bo->flush;
}
va_end(ap);
 
DBG(("%s: num_pages=+%d, num_exec=+%d\n",
__FUNCTION__, num_pages, num_exec));
 
if (!num_pages)
return true;
 
if (kgem_flush(kgem, flush))
return false;
 
if (kgem->aperture > kgem->aperture_low &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
}
 
if (num_pages + kgem->aperture > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
return false;
}
 
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
return false;
}
 
return true;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domain,
uint32_t delta)
{
int index;
 
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
 
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
if( bo != NULL && bo->handle == -2)
{
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
 
if (read_write_domain & 0x7fff && !bo->dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
return 0;
};
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(bo->refcnt);
assert(!bo->purged);
 
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
__FUNCTION__, bo->delta, bo->handle));
delta += bo->delta;
assert(bo->handle == bo->proxy->handle);
/* need to release the cache upon batch submit */
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
}
 
if (read_write_domain & 0x7fff && !bo->dirty)
__kgem_bo_mark_dirty(bo);
 
bo = bo->proxy;
assert(bo->refcnt);
assert(!bo->purged);
}
 
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(RQ_RING(bo->rq) == kgem->ring);
 
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(kgem->nfence < kgem->fence_max);
kgem->aperture_fenced +=
kgem_bo_fenced_size(kgem, bo);
kgem->nfence++;
}
bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
}
 
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
if (read_write_domain & 0x7fff && !bo->dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
 
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
if (kgem->nreloc__self < 256)
kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
 
return delta;
}
 
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
 
DBG(("%s: type=%d, count=%d (bucket: %d)\n",
__FUNCTION__, type, kgem->vma[type].count, bucket));
if (kgem->vma[type].count <= 0)
return;
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
/* vma are limited on a per-process basis to around 64k.
* This includes all malloc arenas as well as other file
* mappings. In order to be fair and not hog the cache,
* and more importantly not to exhaust that limit and to
* start failing mappings, we keep our own number of open
* vma to within a conservative value.
*/
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
 
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
j++) {
struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
if (!list_is_empty(head))
bo = list_last_entry(head, struct kgem_bo, vma);
}
if (bo == NULL)
break;
 
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
assert(IS_CPU_MAP(bo->map) == type);
assert(bo->map);
assert(bo->rq == NULL);
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
 
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: freeing unpurgeable old mapping\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
}
}
}
 
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(!bo->purged);
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(bo->exec == NULL);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
(kgem->has_llc || bo->domain == DOMAIN_CPU)) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
ptr = kgem_bo_map__cpu(kgem, bo);
kgem_bo_sync__cpu(kgem, bo);
return ptr;
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
if (bo->domain != DOMAIN_GTT) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
/* XXX use PROT_READ to avoid the write flush? */
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_GTT;
}
}
 
return ptr;
}
 
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(!bo->purged);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(bytes(bo) <= kgem->aperture_mappable / 4);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
 
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(!bo->scanout);
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
 
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
printf("%s: failed to mmap %d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), 0);
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
 
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
 
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
kgem_bo_submit(kgem, bo);
 
if (bo->domain != DOMAIN_CPU) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = I915_GEM_DOMAIN_CPU;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_CPU;
}
}
}
 
void kgem_clear_dirty(struct kgem *kgem)
{
struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
 
list_for_each_entry(bo, buffers, request) {
if (!bo->dirty)
break;
 
bo->dirty = false;
}
}
 
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length)
{
struct kgem_bo *bo;
 
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
offset, length, target->io));
 
bo = __kgem_bo_alloc(target->handle, length);
if (bo == NULL)
return NULL;
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->reusable = false;
bo->size.bytes = length;
 
bo->io = target->io && target->proxy == NULL;
bo->dirty = target->dirty;
bo->tiling = target->tiling;
bo->pitch = target->pitch;
 
assert(!bo->scanout);
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
 
if (target->exec) {
list_move_tail(&bo->request, &kgem->next_request->buffers);
bo->exec = &_kgem_dummy_exec;
}
bo->rq = target->rq;
 
return bo;
}
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b && b->offset; b = b->next)
if (format == b->format)
return b->offset;
 
return 0;
}
 
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b; b = b->next) {
if (b->offset)
continue;
 
b->offset = offset;
b->format = format;
 
if (b->next)
b->next->offset = 0;
 
return;
}
 
b = malloc(sizeof(*b));
if (b) {
b->next = bo->binding.next;
b->format = format;
b->offset = offset;
bo->binding.next = b;
}
}
 
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
size = fb->pitch * fb->height / PAGE_SIZE;
bo = __kgem_bo_alloc(-2, size);
if (!bo) {
return 0;
}
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = fb->pitch;
bo->tiling = I915_TILING_NONE;
bo->scanout = 1;
fb->fb_bo = bo;
 
// printf("fb width %d height %d pitch %d bo %p\n",
// fb->width, fb->height, fb->pitch, fb->fb_bo);
return 1;
};
 
 
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
bo = fb->fb_bo;
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
fb->fb_bo = bo;
size = fb->pitch * fb->height / PAGE_SIZE;
 
if((size != bo->size.pages.count) ||
(fb->pitch != bo->pitch))
{
bo->size.pages.count = size;
bo->pitch = fb->pitch;
 
printf("fb width %d height %d pitch %d bo %p\n",
fb->width, fb->height, fb->pitch, fb->fb_bo);
return 1;
}
return 0;
};
 
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_destroy(kgem, bo);
kgem_bo_free(kgem, bo);
}
 
 
void kgem_close_batches(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
};
 
 
 
/drivers/video/Intel-2D/kgem.h
0,0 → 1,693
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef KGEM_H
#define KGEM_H
 
#define HAS_DEBUG_FULL 0
 
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#include <stdio.h>
 
#include <i915_drm.h>
 
#include "compiler.h"
#include "intel_list.h"
 
static inline void delay(uint32_t time)
{
__asm__ __volatile__(
"int $0x40"
::"a"(5), "b"(time)
:"memory");
};
 
#undef DBG
 
#if HAS_DEBUG_FULL
#define DBG(x) printf x
#else
#define DBG(x)
#endif
 
struct kgem_bo {
struct kgem_request *rq;
#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3))
#define RQ_RING(rq) ((uintptr_t)(rq) & 3)
#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT)
struct drm_i915_gem_exec_object2 *exec;
 
struct kgem_bo *proxy;
 
struct list list;
struct list request;
struct list vma;
 
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
 
struct kgem_bo_binding {
struct kgem_bo_binding *next;
uint32_t format;
uint16_t offset;
} binding;
 
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
struct {
uint32_t count:27;
#define PAGE_SIZE 4096
uint32_t bucket:5;
#define NUM_CACHE_BUCKETS 16
#define MAX_CACHE_SIZE (1 << (NUM_CACHE_BUCKETS+12))
} pages;
uint32_t bytes;
} size;
uint32_t pitch : 18; /* max 128k */
uint32_t tiling : 2;
uint32_t reusable : 1;
uint32_t dirty : 1;
uint32_t domain : 2;
uint32_t needs_flush : 1;
uint32_t snoop : 1;
uint32_t io : 1;
uint32_t flush : 1;
uint32_t scanout : 1;
uint32_t purged : 1;
};
#define DOMAIN_NONE 0
#define DOMAIN_CPU 1
#define DOMAIN_GTT 2
#define DOMAIN_GPU 3
 
struct kgem_request {
struct list list;
struct kgem_bo *bo;
struct list buffers;
int ring;
};
 
enum {
MAP_GTT = 0,
MAP_CPU,
NUM_MAP_TYPES,
};
 
struct kgem {
int fd;
int wedged;
unsigned gen;
 
uint32_t unique_id;
 
enum kgem_mode {
/* order matches I915_EXEC_RING ordering */
KGEM_NONE = 0,
KGEM_RENDER,
KGEM_BSD,
KGEM_BLT,
} mode, ring;
 
struct list flushing;
struct list large;
struct list large_inactive;
struct list active[NUM_CACHE_BUCKETS][3];
struct list inactive[NUM_CACHE_BUCKETS];
struct list pinned_batches[2];
struct list snoop;
struct list scanout;
struct list batch_buffers, active_buffers;
 
struct list requests[2];
struct kgem_request *next_request;
struct kgem_request static_request;
 
struct {
struct list inactive[NUM_CACHE_BUCKETS];
int16_t count;
} vma[NUM_MAP_TYPES];
 
uint32_t batch_flags;
uint32_t batch_flags_base;
#define I915_EXEC_SECURE (1<<9)
#define LOCAL_EXEC_OBJECT_WRITE (1<<2)
 
uint16_t nbatch;
uint16_t surface;
uint16_t nexec;
uint16_t nreloc;
uint16_t nreloc__self;
uint16_t nfence;
uint16_t batch_size;
uint16_t min_alignment;
 
uint32_t flush:1;
uint32_t need_expire:1;
uint32_t need_purge:1;
uint32_t need_retire:1;
uint32_t need_throttle:1;
uint32_t scanout_busy:1;
uint32_t busy:1;
 
uint32_t has_userptr :1;
uint32_t has_blt :1;
uint32_t has_relaxed_fencing :1;
uint32_t has_relaxed_delta :1;
uint32_t has_semaphores :1;
uint32_t has_secure_batches :1;
uint32_t has_pinned_batches :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
 
uint32_t can_blt_cpu :1;
 
uint16_t fence_max;
uint16_t half_cpu_cache_pages;
uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
uint32_t aperture, aperture_fenced;
uint32_t max_upload_tile_size, max_copy_tile_size;
uint32_t max_gpu_size, max_cpu_size;
uint32_t large_object_size, max_object_size;
uint32_t buffer_size;
 
void (*context_switch)(struct kgem *kgem, int new_mode);
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
 
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
struct drm_i915_gem_relocation_entry reloc[4096];
uint16_t reloc__self[256];
 
#ifdef DEBUG_MEMORY
struct {
int bo_allocs;
size_t bo_bytes;
} debug_memory;
#endif
};
 
#define KGEM_BATCH_RESERVED 1
#define KGEM_RELOC_RESERVED 4
#define KGEM_EXEC_RESERVED 1
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
#endif
 
#define KGEM_BATCH_SIZE(K) ((K)->batch_size-KGEM_BATCH_RESERVED)
#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED)
#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED)
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen);
void kgem_reset(struct kgem *kgem);
 
struct kgem_bo *kgem_create_map(struct kgem *kgem,
void *ptr, uint32_t size,
bool read_only);
 
struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags);
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length);
 
 
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth);
#define KGEM_CAN_CREATE_GPU 0x1
#define KGEM_CAN_CREATE_CPU 0x2
#define KGEM_CAN_CREATE_LARGE 0x4
#define KGEM_CAN_CREATE_GTT 0x8
 
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
uint32_t width,
uint32_t height,
uint32_t pitch,
uint32_t bpp);
enum {
CREATE_EXACT = 0x1,
CREATE_INACTIVE = 0x2,
CREATE_CPU_MAP = 0x4,
CREATE_GTT_MAP = 0x8,
CREATE_SCANOUT = 0x10,
CREATE_PRIME = 0x20,
CREATE_TEMPORARY = 0x40,
CREATE_CACHED = 0x80,
CREATE_NO_RETIRE = 0x100,
CREATE_NO_THROTTLE = 0x200,
};
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags);
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags);
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_retire(struct kgem *kgem);
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring);
static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring)
{
ring = ring == KGEM_BLT;
 
if (list_is_empty(&kgem->requests[ring]))
return true;
 
return __kgem_ring_is_idle(kgem, ring);
}
 
static inline bool kgem_is_idle(struct kgem *kgem)
{
if (!kgem->need_retire)
return true;
 
return kgem_ring_is_idle(kgem, kgem->ring);
}
 
void _kgem_submit(struct kgem *kgem);
static inline void kgem_submit(struct kgem *kgem)
{
if (kgem->nbatch)
_kgem_submit(kgem);
}
 
static inline bool kgem_flush(struct kgem *kgem, bool flush)
{
if (kgem->nreloc == 0)
return false;
 
return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->exec)
_kgem_submit(kgem);
}
 
void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
 
if (!bo->needs_flush)
return;
 
/* If the kernel fails to emit the flush, then it will be forced when
* we assume direct access. And as the useual failure is EIO, we do
* not actualy care.
*/
__kgem_flush(kgem, bo);
}
 
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
{
assert(bo->refcnt);
bo->refcnt++;
return bo;
}
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
_kgem_bo_destroy(kgem, bo);
}
 
void kgem_clear_dirty(struct kgem *kgem);
 
static inline void kgem_set_mode(struct kgem *kgem,
enum kgem_mode mode,
struct kgem_bo *bo)
{
assert(!kgem->wedged);
 
#if DEBUG_FLUSH_BATCH
kgem_submit(kgem);
#endif
 
if (kgem->mode == mode)
return;
 
// kgem->context_switch(kgem, mode);
kgem->mode = mode;
}
 
static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
{
assert(kgem->mode == KGEM_NONE);
assert(kgem->nbatch == 0);
assert(!kgem->wedged);
// kgem->context_switch(kgem, mode);
kgem->mode = mode;
}
 
static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords)
{
assert(num_dwords > 0);
assert(kgem->nbatch < kgem->surface);
assert(kgem->surface <= kgem->batch_size);
return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface);
}
 
static inline bool kgem_check_reloc(struct kgem *kgem, int n)
{
assert(kgem->nreloc <= KGEM_RELOC_SIZE(kgem));
return likely(kgem->nreloc + n <= KGEM_RELOC_SIZE(kgem));
}
 
static inline bool kgem_check_exec(struct kgem *kgem, int n)
{
assert(kgem->nexec <= KGEM_EXEC_SIZE(kgem));
return likely(kgem->nexec + n <= KGEM_EXEC_SIZE(kgem));
}
 
static inline bool kgem_check_reloc_and_exec(struct kgem *kgem, int n)
{
return kgem_check_reloc(kgem, n) && kgem_check_exec(kgem, n);
}
 
static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
int num_dwords,
int num_surfaces)
{
return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) &&
kgem_check_reloc(kgem, num_surfaces) &&
kgem_check_exec(kgem, num_surfaces);
}
 
static inline uint32_t *kgem_get_batch(struct kgem *kgem)
{
 
return kgem->batch + kgem->nbatch;
}
 
bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
 
#define KGEM_RELOC_FENCED 0x8000
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domains,
uint32_t delta);
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write);
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length);
 
int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo);
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size);
 
static inline int __kgem_buffer_size(struct kgem_bo *bo)
{
assert(bo->proxy != NULL);
return bo->size.bytes;
}
 
static inline int __kgem_bo_size(struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
return PAGE_SIZE * bo->size.pages.count;
}
 
static inline int kgem_bo_size(struct kgem_bo *bo)
{
if (bo->proxy)
return __kgem_buffer_size(bo);
else
return __kgem_bo_size(bo);
}
 
/*
static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
struct kgem_bo *bo)
{
int pitch = bo->pitch;
if (kgem->gen >= 040 && bo->tiling)
pitch /= 4;
if (pitch > MAXSHORT) {
DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n",
__FUNCTION__, bo->handle, pitch));
return false;
}
 
return true;
}
 
static inline bool kgem_bo_can_blt(struct kgem *kgem,
struct kgem_bo *bo)
{
if (bo->tiling == I915_TILING_Y) {
DBG(("%s: can not blt to handle=%d, tiling=Y\n",
__FUNCTION__, bo->handle));
return false;
}
 
return kgem_bo_blt_pitch_is_ok(kgem, bo);
}
*/
 
static inline bool __kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
if (bo->domain == DOMAIN_GTT)
return true;
 
if (kgem->gen < 040 && bo->tiling &&
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
 
if (!bo->presumed_offset)
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 
return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable;
}
 
static inline bool kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: domain=%d, offset: %d size: %d\n",
__FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo)));
assert(bo->refcnt);
return __kgem_bo_is_mappable(kgem, bo);
}
 
static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map, bo->tiling, bo->domain));
assert(bo->refcnt);
 
if (bo->map == NULL)
return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU;
 
return IS_CPU_MAP(bo->map) == !bo->tiling;
}
 
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
if (kgem_bo_mapped(kgem, bo))
return true;
 
if (!bo->tiling && kgem->has_llc)
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
}
 
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
assert(bo->refcnt);
while (bo->proxy)
bo = bo->proxy;
return bo->snoop;
}
 
bool __kgem_busy(struct kgem *kgem, int handle);
 
static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
{
bo->rq = (struct kgem_request *)((uintptr_t)bo->rq | ring);
}
 
inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
{
bo->needs_flush = false;
list_del(&bo->request);
bo->rq = NULL;
bo->domain = DOMAIN_NONE;
}
 
static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
assert(bo->refcnt);
return bo->rq;
}
 
/*
 
static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
assert(bo->refcnt);
 
if (bo->exec)
return true;
 
if (kgem_flush(kgem, bo->flush))
kgem_submit(kgem);
 
if (bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
return kgem_bo_is_busy(bo);
}
 
*/
 
static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
{
if (bo == NULL)
return false;
 
assert(bo->refcnt);
return bo->dirty;
}
 
static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
{
/* The bo is outside of our control, so presume it is written to */
bo->needs_flush = true;
if (bo->rq == NULL)
bo->rq = (void *)kgem;
 
if (bo->domain != DOMAIN_GPU)
bo->domain = DOMAIN_NONE;
}
 
static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo)
{
DBG(("%s: handle=%d (proxy? %d)\n", __FUNCTION__,
bo->handle, bo->proxy != NULL));
 
bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
bo->needs_flush = bo->dirty = true;
list_move(&bo->request, &RQ(bo->rq)->buffers);
}
 
static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
{
assert(bo->refcnt);
do {
assert(bo->exec);
assert(bo->rq);
 
if (bo->dirty)
return;
 
__kgem_bo_mark_dirty(bo);
} while ((bo = bo->proxy));
}
 
#define KGEM_BUFFER_WRITE 0x1
#define KGEM_BUFFER_INPLACE 0x2
#define KGEM_BUFFER_LAST 0x4
 
#define KGEM_BUFFER_WRITE_INPLACE (KGEM_BUFFER_WRITE | KGEM_BUFFER_INPLACE)
 
struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t size, uint32_t flags,
void **ret);
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret);
bool kgem_buffer_is_inplace(struct kgem_bo *bo);
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
 
void kgem_throttle(struct kgem *kgem);
#define MAX_INACTIVE_TIME 10
bool kgem_expire_cache(struct kgem *kgem);
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
 
#if HAS_DEBUG_FULL
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
#else
static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
{
(void)kgem;
(void)nbatch;
}
#endif
 
#endif /* KGEM_H */
/drivers/video/Intel-2D/sna.c
0,0 → 1,967
 
 
#include <memory.h>
#include <malloc.h>
#include <kos32sys.h>
#include <pixlib2.h>
 
#include "sna.h"
 
#define to_surface(x) (surface_t*)((x)->handle)
 
static struct sna_fb sna_fb;
static int tls_mask;
 
int tls_alloc(void);
 
static inline void *tls_get(int key)
{
void *val;
__asm__ __volatile__(
"movl %%fs:(%1), %0"
:"=r"(val)
:"r"(key));
 
return val;
};
 
static inline int
tls_set(int key, const void *ptr)
{
if(!(key & 3))
{
__asm__ __volatile__(
"movl %0, %%fs:(%1)"
::"r"(ptr),"r"(key));
return 0;
}
else return -1;
}
 
 
 
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb);
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb);
uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing,
unsigned flags, uint32_t width, uint32_t height,
uint32_t bpp, uint32_t tiling, uint32_t *pitch);
 
void kgem_close_batches(struct kgem *kgem);
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
 
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci);
 
//struct kgem_bo *create_bo(bitmap_t *bitmap);
 
static bool sna_solid_cache_init(struct sna *sna);
 
struct sna *sna_device;
 
__LOCK_INIT_RECURSIVE(, __sna_lock);
 
static void no_render_reset(struct sna *sna)
{
(void)sna;
}
 
void no_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
memset (render,0, sizeof (*render));
 
render->prefer_gpu = PREFER_GPU_BLT;
 
render->vertices = render->vertex_data;
render->vertex_size = ARRAY_SIZE(render->vertex_data);
 
// render->composite = no_render_composite;
 
// render->copy_boxes = no_render_copy_boxes;
// render->copy = no_render_copy;
 
// render->fill_boxes = no_render_fill_boxes;
// render->fill = no_render_fill;
// render->fill_one = no_render_fill_one;
// render->clear = no_render_clear;
 
render->reset = no_render_reset;
// render->flush = no_render_flush;
// render->fini = no_render_fini;
 
// sna->kgem.context_switch = no_render_context_switch;
// sna->kgem.retire = no_render_retire;
 
if (sna->kgem.gen >= 60)
sna->kgem.ring = KGEM_RENDER;
 
sna_vertex_init(sna);
}
 
void sna_vertex_init(struct sna *sna)
{
// pthread_mutex_init(&sna->render.lock, NULL);
// pthread_cond_init(&sna->render.wait, NULL);
sna->render.active = 0;
}
 
int sna_accel_init(struct sna *sna)
{
const char *backend;
 
// list_init(&sna->deferred_free);
// list_init(&sna->dirty_pixmaps);
// list_init(&sna->active_pixmaps);
// list_init(&sna->inactive_clock[0]);
// list_init(&sna->inactive_clock[1]);
 
// sna_accel_install_timers(sna);
 
 
backend = "no";
no_render_init(sna);
 
if (sna->info->gen >= 0100) {
} else if (sna->info->gen >= 070) {
if (gen7_render_init(sna))
backend = "IvyBridge";
} else if (sna->info->gen >= 060) {
if (gen6_render_init(sna))
backend = "SandyBridge";
} else if (sna->info->gen >= 050) {
if (gen5_render_init(sna))
backend = "Ironlake";
} else if (sna->info->gen >= 040) {
if (gen4_render_init(sna))
backend = "Broadwater/Crestline";
} else if (sna->info->gen >= 030) {
if (gen3_render_init(sna))
backend = "gen3";
}
 
DBG(("%s(backend=%s, prefer_gpu=%x)\n",
__FUNCTION__, backend, sna->render.prefer_gpu));
 
kgem_reset(&sna->kgem);
 
// if (!sna_solid_cache_init(sna))
// return false;
 
sna_device = sna;
 
 
return kgem_init_fb(&sna->kgem, &sna_fb);
}
 
int sna_init(uint32_t service)
{
ioctl_t io;
int caps = 0;
 
static struct pci_device device;
struct sna *sna;
 
DBG(("%s\n", __FUNCTION__));
 
__lock_acquire_recursive(__sna_lock);
 
if(sna_device)
goto done;
io.handle = service;
io.io_code = SRV_GET_PCI_INFO;
io.input = &device;
io.inp_size = sizeof(device);
io.output = NULL;
io.out_size = 0;
 
if (call_service(&io)!=0)
goto err1;
sna = malloc(sizeof(*sna));
if (sna == NULL)
goto err1;
 
memset(sna, 0, sizeof(*sna));
sna->PciInfo = &device;
 
sna->info = intel_detect_chipset(sna->PciInfo);
 
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
/*
if (!xf86ReturnOptValBool(sna->Options,
OPTION_RELAXED_FENCING,
sna->kgem.has_relaxed_fencing)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED,
"Disabling use of relaxed fencing\n");
sna->kgem.has_relaxed_fencing = 0;
}
if (!xf86ReturnOptValBool(sna->Options,
OPTION_VMAP,
sna->kgem.has_vmap)) {
xf86DrvMsg(scrn->scrnIndex,
sna->kgem.has_vmap ? X_CONFIG : X_PROBED,
"Disabling use of vmap\n");
sna->kgem.has_vmap = 0;
}
*/
 
/* Disable tiling by default */
sna->tiling = SNA_TILING_DISABLE;
 
/* Default fail-safe value of 75 Hz */
// sna->vblank_interval = 1000 * 1000 * 1000 / 75;
 
sna->flags = 0;
 
sna_accel_init(sna);
 
tls_mask = tls_alloc();
// printf("tls mask %x\n", tls_mask);
done:
caps = sna_device->render.caps;
 
err1:
__lock_release_recursive(__sna_lock);
return caps;
}
 
void sna_fini()
{
if( sna_device )
{
struct kgem_bo *mask;
__lock_acquire_recursive(__sna_lock);
mask = tls_get(tls_mask);
sna_device->render.fini(sna_device);
if(mask)
kgem_bo_destroy(&sna_device->kgem, mask);
kgem_close_batches(&sna_device->kgem);
kgem_cleanup_cache(&sna_device->kgem);
sna_device = NULL;
__lock_release_recursive(__sna_lock);
};
}
 
#if 0
 
static bool sna_solid_cache_init(struct sna *sna)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
 
DBG(("%s\n", __FUNCTION__));
 
cache->cache_bo =
kgem_create_linear(&sna->kgem, sizeof(cache->color));
if (!cache->cache_bo)
return FALSE;
 
/*
* Initialise [0] with white since it is very common and filling the
* zeroth slot simplifies some of the checks.
*/
cache->color[0] = 0xffffffff;
cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
cache->bo[0]->pitch = 4;
cache->dirty = 1;
cache->size = 1;
cache->last = 0;
 
return TRUE;
}
 
void
sna_render_flush_solid(struct sna *sna)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
 
DBG(("sna_render_flush_solid(size=%d)\n", cache->size));
assert(cache->dirty);
assert(cache->size);
 
kgem_bo_write(&sna->kgem, cache->cache_bo,
cache->color, cache->size*sizeof(uint32_t));
cache->dirty = 0;
cache->last = 0;
}
 
static void
sna_render_finish_solid(struct sna *sna, bool force)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
int i;
 
DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n",
force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty));
 
if (!force && cache->cache_bo->domain != DOMAIN_GPU)
return;
 
if (cache->dirty)
sna_render_flush_solid(sna);
 
for (i = 0; i < cache->size; i++) {
if (cache->bo[i] == NULL)
continue;
 
kgem_bo_destroy(&sna->kgem, cache->bo[i]);
cache->bo[i] = NULL;
}
kgem_bo_destroy(&sna->kgem, cache->cache_bo);
 
DBG(("sna_render_finish_solid reset\n"));
 
cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color));
cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
cache->bo[0]->pitch = 4;
if (force)
cache->size = 1;
}
 
 
struct kgem_bo *
sna_render_get_solid(struct sna *sna, uint32_t color)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
int i;
 
DBG(("%s: %08x\n", __FUNCTION__, color));
 
// if ((color & 0xffffff) == 0) /* alpha only */
// return kgem_bo_reference(sna->render.alpha_cache.bo[color>>24]);
 
if (color == 0xffffffff) {
DBG(("%s(white)\n", __FUNCTION__));
return kgem_bo_reference(cache->bo[0]);
}
 
if (cache->color[cache->last] == color) {
DBG(("sna_render_get_solid(%d) = %x (last)\n",
cache->last, color));
return kgem_bo_reference(cache->bo[cache->last]);
}
 
for (i = 1; i < cache->size; i++) {
if (cache->color[i] == color) {
if (cache->bo[i] == NULL) {
DBG(("sna_render_get_solid(%d) = %x (recreate)\n",
i, color));
goto create;
} else {
DBG(("sna_render_get_solid(%d) = %x (old)\n",
i, color));
goto done;
}
}
}
 
sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
 
i = cache->size++;
cache->color[i] = color;
cache->dirty = 1;
DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color));
 
create:
cache->bo[i] = kgem_create_proxy(cache->cache_bo,
i*sizeof(uint32_t), sizeof(uint32_t));
cache->bo[i]->pitch = 4;
 
done:
cache->last = i;
return kgem_bo_reference(cache->bo[i]);
}
 
#endif
 
 
int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y,
int w, int h, int src_x, int src_y)
 
{
struct sna_copy_op copy;
struct _Pixmap src, dst;
struct kgem_bo *src_bo;
 
char proc_info[1024];
int winx, winy;
 
get_proc_info(proc_info);
 
winx = *(uint32_t*)(proc_info+34);
winy = *(uint32_t*)(proc_info+38);
memset(&src, 0, sizeof(src));
memset(&dst, 0, sizeof(dst));
 
src.drawable.bitsPerPixel = 32;
src.drawable.width = src_bitmap->width;
src.drawable.height = src_bitmap->height;
 
dst.drawable.bitsPerPixel = 32;
dst.drawable.width = sna_fb.width;
dst.drawable.height = sna_fb.height;
memset(&copy, 0, sizeof(copy));
 
src_bo = (struct kgem_bo*)src_bitmap->handle;
if( sna_device->render.copy(sna_device, GXcopy,
&src, src_bo,
&dst, sna_fb.fb_bo, &copy) )
{
copy.blt(sna_device, &copy, src_x, src_y, w, h, winx+dst_x, winy+dst_y);
copy.done(sna_device, &copy);
}
 
kgem_submit(&sna_device->kgem);
return 0;
// __asm__ __volatile__("int3");
};
 
typedef struct
{
uint32_t width;
uint32_t height;
void *data;
uint32_t pitch;
struct kgem_bo *bo;
uint32_t bo_size;
uint32_t flags;
}surface_t;
 
 
 
int sna_create_bitmap(bitmap_t *bitmap)
{
surface_t *sf;
struct kgem_bo *bo;
sf = malloc(sizeof(*sf));
if(sf == NULL)
goto err_1;
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height,
32,I915_TILING_NONE, CREATE_CPU_MAP);
if(bo == NULL)
goto err_2;
void *map = kgem_bo_map(&sna_device->kgem, bo);
if(map == NULL)
goto err_3;
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = map;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
sf->flags = bitmap->flags;
bitmap->handle = (uint32_t)sf;
__lock_release_recursive(__sna_lock);
return 0;
err_3:
kgem_bo_destroy(&sna_device->kgem, bo);
err_2:
__lock_release_recursive(__sna_lock);
free(sf);
err_1:
return -1;
};
 
int sna_destroy_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
__lock_acquire_recursive(__sna_lock);
kgem_bo_destroy(&sna_device->kgem, sf->bo);
__lock_release_recursive(__sna_lock);
 
free(sf);
bitmap->handle = -1;
bitmap->data = (void*)-1;
bitmap->pitch = -1;
 
return 0;
};
 
int sna_lock_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
// printf("%s\n", __FUNCTION__);
__lock_acquire_recursive(__sna_lock);
kgem_bo_sync__cpu(&sna_device->kgem, sf->bo);
 
__lock_release_recursive(__sna_lock);
bitmap->data = sf->data;
bitmap->pitch = sf->pitch;
 
return 0;
};
 
int sna_resize_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
struct kgem *kgem = &sna_device->kgem;
struct kgem_bo *bo = sf->bo;
uint32_t size;
uint32_t pitch;
 
bitmap->pitch = -1;
bitmap->data = (void *) -1;
 
size = kgem_surface_size(kgem,kgem->has_relaxed_fencing, CREATE_CPU_MAP,
bitmap->width, bitmap->height, 32, I915_TILING_NONE, &pitch);
assert(size && size <= kgem->max_object_size);
if(sf->bo_size >= size)
{
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->pitch = pitch;
bo->pitch = pitch;
return 0;
}
else
{
__lock_acquire_recursive(__sna_lock);
sna_bo_destroy(kgem, bo);
sf->bo = NULL;
bo = kgem_create_2d(kgem, bitmap->width, bitmap->height,
32, I915_TILING_NONE, CREATE_CPU_MAP);
 
if(bo == NULL)
{
__lock_release_recursive(__sna_lock);
return -1;
};
void *map = kgem_bo_map(kgem, bo);
if(map == NULL)
{
sna_bo_destroy(kgem, bo);
__lock_release_recursive(__sna_lock);
return -1;
};
__lock_release_recursive(__sna_lock);
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = map;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
}
 
return 0;
};
 
 
 
int sna_create_mask()
{
struct kgem_bo *bo;
 
// printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height);
__lock_acquire_recursive(__sna_lock);
bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height,
8,I915_TILING_NONE, CREATE_CPU_MAP);
if(unlikely(bo == NULL))
goto err_1;
int *map = kgem_bo_map(&sna_device->kgem, bo);
if(map == NULL)
goto err_2;
__lock_release_recursive(__sna_lock);
memset(map, 0, bo->pitch * sna_fb.height);
tls_set(tls_mask, bo);
return 0;
err_2:
kgem_bo_destroy(&sna_device->kgem, bo);
err_1:
__lock_release_recursive(__sna_lock);
return -1;
};
 
 
bool
gen6_composite(struct sna *sna,
uint8_t op,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
 
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y,
int w, int h, int src_x, int src_y)
 
{
surface_t *sf = to_surface(bitmap);
 
struct drm_i915_mask_update update;
struct sna_composite_op composite;
struct _Pixmap src, dst, mask;
struct kgem_bo *src_bo, *mask_bo;
int winx, winy;
 
char proc_info[1024];
 
get_proc_info(proc_info);
 
winx = *(uint32_t*)(proc_info+34);
winy = *(uint32_t*)(proc_info+38);
// winw = *(uint32_t*)(proc_info+42)+1;
// winh = *(uint32_t*)(proc_info+46)+1;
mask_bo = tls_get(tls_mask);
if(unlikely(mask_bo == NULL))
{
sna_create_mask();
mask_bo = tls_get(tls_mask);
if( mask_bo == NULL)
return -1;
};
if(kgem_update_fb(&sna_device->kgem, &sna_fb))
{
__lock_acquire_recursive(__sna_lock);
kgem_bo_destroy(&sna_device->kgem, mask_bo);
__lock_release_recursive(__sna_lock);
sna_create_mask();
mask_bo = tls_get(tls_mask);
if( mask_bo == NULL)
return -1;
}
VG_CLEAR(update);
update.handle = mask_bo->handle;
update.bo_map = (__u32)MAP(mask_bo->map);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
mask_bo->pitch = update.bo_pitch;
memset(&src, 0, sizeof(src));
memset(&dst, 0, sizeof(dst));
memset(&mask, 0, sizeof(dst));
 
src.drawable.bitsPerPixel = 32;
src.drawable.width = sf->width;
src.drawable.height = sf->height;
 
dst.drawable.bitsPerPixel = 32;
dst.drawable.width = sna_fb.width;
dst.drawable.height = sna_fb.height;
mask.drawable.bitsPerPixel = 8;
mask.drawable.width = update.width;
mask.drawable.height = update.height;
 
memset(&composite, 0, sizeof(composite));
 
src_bo = sf->bo;
__lock_acquire_recursive(__sna_lock);
 
if( sna_device->render.blit_tex(sna_device, PictOpSrc,scale,
&src, src_bo,
&mask, mask_bo,
&dst, sna_fb.fb_bo,
src_x, src_y,
dst_x, dst_y,
winx+dst_x, winy+dst_y,
w, h,
&composite) )
{
struct sna_composite_rectangles r;
r.src.x = src_x;
r.src.y = src_y;
r.mask.x = dst_x;
r.mask.y = dst_y;
r.dst.x = winx+dst_x;
r.dst.y = winy+dst_y;
r.width = w;
r.height = h;
composite.blt(sna_device, &composite, &r);
composite.done(sna_device, &composite);
};
kgem_submit(&sna_device->kgem);
__lock_release_recursive(__sna_lock);
 
bitmap->data = (void*)-1;
bitmap->pitch = -1;
return 0;
}
 
 
 
 
 
 
 
 
 
static const struct intel_device_info intel_generic_info = {
.gen = -1,
};
 
static const struct intel_device_info intel_i915_info = {
.gen = 030,
};
static const struct intel_device_info intel_i945_info = {
.gen = 031,
};
 
static const struct intel_device_info intel_g33_info = {
.gen = 033,
};
 
static const struct intel_device_info intel_i965_info = {
.gen = 040,
};
 
static const struct intel_device_info intel_g4x_info = {
.gen = 045,
};
 
static const struct intel_device_info intel_ironlake_info = {
.gen = 050,
};
 
static const struct intel_device_info intel_sandybridge_info = {
.gen = 060,
};
 
static const struct intel_device_info intel_ivybridge_info = {
.gen = 070,
};
 
static const struct intel_device_info intel_valleyview_info = {
.gen = 071,
};
 
static const struct intel_device_info intel_haswell_info = {
.gen = 075,
};
 
#define INTEL_DEVICE_MATCH(d,i) \
{ 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) }
 
 
static const struct pci_id_match intel_device_match[] = {
 
 
INTEL_DEVICE_MATCH (PCI_CHIP_I915_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_E7221_G, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I915_GM, &intel_i915_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_G, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GM, &intel_i945_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I945_GME, &intel_i945_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_M, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_PINEVIEW_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G33_G, &intel_g33_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q33_G, &intel_g33_info ),
/* Another marketing win: Q35 is another g33 device not a gen4 part
* like its G35 brethren.
*/
INTEL_DEVICE_MATCH (PCI_CHIP_Q35_G, &intel_g33_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_I965_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G35_G, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_Q, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I946_GZ, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GM, &intel_i965_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_I965_GME, &intel_i965_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_GM45_GM, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_E_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_Q45_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_G41_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G, &intel_g4x_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_B43_G1, &intel_g4x_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_D_G, &intel_ironlake_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IRONLAKE_M_G, &intel_ironlake_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT1, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS, &intel_sandybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_SANDYBRIDGE_S_GT, &intel_sandybridge_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_M_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_D_GT2, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT1, &intel_ivybridge_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_IVYBRIDGE_S_GT2, &intel_ivybridge_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_SDV_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_ULT_S_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_D_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_M_GT2_PLUS, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT1, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2, &intel_haswell_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_HASWELL_CRW_S_GT2_PLUS, &intel_haswell_info ),
 
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_PO, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_1, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_2, &intel_valleyview_info ),
INTEL_DEVICE_MATCH (PCI_CHIP_VALLEYVIEW_3, &intel_valleyview_info ),
 
INTEL_DEVICE_MATCH (PCI_MATCH_ANY, &intel_generic_info ),
 
{ 0, 0, 0 },
};
 
const struct pci_id_match *PciDevMatch(uint16_t dev,const struct pci_id_match *list)
{
while(list->device_id)
{
if(dev==list->device_id)
return list;
list++;
}
return NULL;
}
 
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci)
{
const struct pci_id_match *ent = NULL;
 
ent = PciDevMatch(pci->device_id, intel_device_match);
if(ent != NULL)
return (const struct intel_device_info*)ent->match_data;
else
return &intel_generic_info;
#if 0
for (i = 0; intel_chipsets[i].name != NULL; i++) {
if (DEVICE_ID(pci) == intel_chipsets[i].token) {
name = intel_chipsets[i].name;
break;
}
}
if (name == NULL) {
xf86DrvMsg(scrn->scrnIndex, X_WARNING, "unknown chipset\n");
name = "unknown";
} else {
xf86DrvMsg(scrn->scrnIndex, from,
"Integrated Graphics Chipset: Intel(R) %s\n",
name);
}
 
scrn->chipset = name;
#endif
}
 
 
int drmIoctl(int fd, unsigned long request, void *arg)
{
ioctl_t io;
 
io.handle = fd;
io.io_code = request;
io.input = arg;
io.inp_size = 64;
io.output = NULL;
io.out_size = 0;
 
return call_service(&io);
}
 
 
 
/drivers/video/Intel-2D/sna.h
0,0 → 1,355
/**************************************************************************
 
Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
Copyright © 2002 David Dawes
 
All Rights Reserved.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sub license, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial portions
of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* David Dawes <dawes@xfree86.org>
*
*/
 
#ifndef _SNA_H_
#define _SNA_H_
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <stdint.h>
#include <memory.h>
#include <malloc.h>
#include <errno.h>
 
#include "intel_driver.h"
#include "pciaccess.h"
 
#include "compiler.h"
 
//#define DBG(x)
//#define DBG(x) ErrorF x
 
#define assert(x)
 
 
int drmIoctl(int fd, unsigned long request, void *arg);
 
 
#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \
((type) << 16) | \
((a) << 12) | \
((r) << 8) | \
((g) << 4) | \
((b)))
#define PIXMAN_TYPE_OTHER 0
#define PIXMAN_TYPE_A 1
#define PIXMAN_TYPE_ARGB 2
#define PIXMAN_TYPE_ABGR 3
#define PIXMAN_TYPE_COLOR 4
#define PIXMAN_TYPE_GRAY 5
#define PIXMAN_TYPE_YUY2 6
#define PIXMAN_TYPE_YV12 7
#define PIXMAN_TYPE_BGRA 8
#define PIXMAN_TYPE_RGBA 9
#define PIXMAN_TYPE_ARGB_SRGB 10
 
/* 32bpp formats */
typedef enum {
PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0)
} pixman_format_code_t;
 
typedef enum _PictFormatShort {
 
PICT_a8r8g8b8 = PIXMAN_a8r8g8b8,
PICT_x8r8g8b8 = PIXMAN_x8r8g8b8,
PICT_a8b8g8r8 = PIXMAN_a8b8g8r8,
PICT_x8b8g8r8 = PIXMAN_x8b8g8r8,
PICT_b8g8r8a8 = PIXMAN_b8g8r8a8,
PICT_b8g8r8x8 = PIXMAN_b8g8r8x8,
 
/* 8bpp formats */
PICT_a8 = PIXMAN_a8,
 
/* 4bpp formats */
} PictFormatShort;
 
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff)
 
#define PICT_FORMAT_A(f) PIXMAN_FORMAT_A(f)
 
#define RepeatNone 0
#define RepeatNormal 1
#define RepeatPad 2
#define RepeatReflect 3
 
#define PictFilterNearest 0
#define PictFilterBilinear 1
 
#define PictFilterFast 2
#define PictFilterGood 3
#define PictFilterBest 4
 
#define PictFilterConvolution 5
 
typedef int32_t pixman_fixed_16_16_t;
typedef pixman_fixed_16_16_t pixman_fixed_t;
 
struct pixman_transform
{
pixman_fixed_t matrix[3][3];
};
 
typedef unsigned long Picture;
typedef unsigned long PictFormat;
 
typedef struct _Pixmap *PixmapPtr;
typedef struct _Picture *PicturePtr;
typedef struct _Drawable *DrawablePtr;
typedef struct _PictFormat *PictFormatPtr;
 
typedef struct pixman_transform PictTransform, *PictTransformPtr;
 
 
 
typedef struct _Drawable {
unsigned char type; /* DRAWABLE_<type> */
unsigned char class; /* specific to type */
unsigned char depth;
unsigned char bitsPerPixel;
unsigned int id; /* resource id */
short x; /* window: screen absolute, pixmap: 0 */
short y; /* window: screen absolute, pixmap: 0 */
unsigned short width;
unsigned short height;
} DrawableRec;
 
/*
* PIXMAP -- device dependent
*/
 
typedef struct _Pixmap {
DrawableRec drawable;
// PrivateRec *devPrivates;
int refcnt;
int devKind; /* This is the pitch of the pixmap, typically width*bpp/8. */
// DevUnion devPrivate; /* When !NULL, devPrivate.ptr points to the raw pixel data. */
#ifdef COMPOSITE
short screen_x;
short screen_y;
#endif
unsigned usage_hint; /* see CREATE_PIXMAP_USAGE_* */
 
PixmapPtr master_pixmap; /* pointer to master copy of pixmap for pixmap sharing */
} PixmapRec;
 
typedef struct _PictFormat {
uint32_t id;
uint32_t format; /* except bpp */
unsigned char type;
unsigned char depth;
// DirectFormatRec direct;
// IndexFormatRec index;
} PictFormatRec;
 
typedef struct _Picture {
DrawablePtr pDrawable;
// PictFormatPtr pFormat;
PictFormatShort format; /* PICT_FORMAT */
int refcnt;
uint32_t id;
unsigned int repeat:1;
unsigned int graphicsExposures:1;
unsigned int subWindowMode:1;
unsigned int polyEdge:1;
unsigned int polyMode:1;
unsigned int freeCompClip:1;
unsigned int clientClipType:2;
unsigned int componentAlpha:1;
unsigned int repeatType:2;
unsigned int filter:3;
// unsigned int stateChanges:CPLastBit;
// unsigned int unused:18 - CPLastBit;
 
// PicturePtr alphaMap;
 
// PictTransform *transform;
 
// SourcePictPtr pSourcePict;
// xFixed *filter_params;
// int filter_nparams;
} PictureRec;
 
#define PolyModePrecise 0
#define PolyModeImprecise 1
 
 
struct sna_fb
{
uint32_t width;
uint32_t height;
uint32_t pitch;
uint32_t tiling;
struct kgem_bo *fb_bo;
};
 
struct pixman_box16
{
int16_t x1, y1, x2, y2;
};
 
typedef struct pixman_box16 BoxRec;
typedef unsigned int CARD32;
typedef unsigned short CARD16;
 
#include "sna_render.h"
#include "kgem.h"
 
#define GXclear 0x0
#define GXcopy 0x3
 
#define PictOpClear 0
#define PictOpSrc 1
#define PictOpDst 2
#define PictOpOver 3
#define PictOpOverReverse 4
#define PictOpIn 5
#define PictOpInReverse 6
#define PictOpOut 7
#define PictOpOutReverse 8
#define PictOpAtop 9
#define PictOpAtopReverse 10
#define PictOpXor 11
#define PictOpAdd 12
#define PictOpSaturate 13
#define PictOpMaximum 13
 
 
 
struct sna {
unsigned flags;
#define SNA_NO_WAIT 0x1
#define SNA_NO_FLIP 0x2
#define SNA_TRIPLE_BUFFER 0x4
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
 
struct list flush_pixmaps;
struct list active_pixmaps;
 
 
 
// int vblank_interval;
 
// struct list deferred_free;
// struct list dirty_pixmaps;
// struct list active_pixmaps;
// struct list inactive_clock[2];
 
unsigned int tiling;
#define SNA_TILING_DISABLE 0x0
#define SNA_TILING_FB 0x1
#define SNA_TILING_2D 0x2
#define SNA_TILING_ALL (~0)
 
struct pci_device *PciInfo;
const struct intel_device_info *info;
 
// PicturePtr clear;
struct {
uint32_t fill_bo;
uint32_t fill_pixel;
uint32_t fill_alu;
} blt_state;
union {
// struct gen2_render_state gen2;
struct gen3_render_state gen3;
struct gen4_render_state gen4;
struct gen5_render_state gen5;
struct gen6_render_state gen6;
struct gen7_render_state gen7;
} render_state;
 
 
/* Broken-out options. */
// OptionInfoPtr Options;
 
/* Driver phase/state information */
// Bool suspended;
 
struct kgem kgem;
struct sna_render render;
 
#if DEBUG_MEMORY
struct {
int shadow_pixels_allocs;
int cpu_bo_allocs;
size_t shadow_pixels_bytes;
size_t cpu_bo_bytes;
} debug_memory;
#endif
};
 
static inline struct sna *
to_sna_from_kgem(struct kgem *kgem)
{
return container_of(kgem, struct sna, kgem);
}
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#endif
 
#ifndef ALIGN
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#endif
 
#ifndef MIN
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) >= (b) ? (a) : (b))
#endif
#endif /* _SNA_H */
/drivers/video/Intel-2D/sna_render.h
0,0 → 1,729
#ifndef SNA_RENDER_H
#define SNA_RENDER_H
 
#include "compiler.h"
 
#include <stdbool.h>
#include <stdint.h>
 
#define GRADIENT_CACHE_SIZE 16
 
#define GXinvalid 0xff
 
#define HW_BIT_BLIT (1<<0) /* BGRX blitter */
#define HW_TEX_BLIT (1<<1) /* stretch blit */
#define HW_VID_BLIT (1<<2) /* planar and packed video */
 
struct sna;
struct sna_glyph;
struct sna_video;
struct sna_video_frame;
struct brw_compile;
 
struct sna_composite_rectangles {
struct sna_coordinate {
int16_t x, y;
} src, mask, dst;
int16_t width, height;
};
 
struct sna_composite_op {
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
fastcall void (*box)(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box);
void (*boxes)(struct sna *sna, const struct sna_composite_op *op,
const BoxRec *box, int nbox);
void (*done)(struct sna *sna, const struct sna_composite_op *op);
 
struct sna_damage **damage;
 
uint32_t op;
 
struct {
PixmapPtr pixmap;
CARD32 format;
struct kgem_bo *bo;
int16_t x, y;
uint16_t width, height;
} dst;
 
struct sna_composite_channel {
struct kgem_bo *bo;
PictTransform *transform;
uint16_t width;
uint16_t height;
uint32_t pict_format;
uint32_t card_format;
uint32_t filter;
uint32_t repeat;
uint32_t is_affine : 1;
uint32_t is_solid : 1;
uint32_t is_linear : 1;
uint32_t is_opaque : 1;
uint32_t alpha_fixup : 1;
uint32_t rb_reversed : 1;
int16_t offset[2];
float scale[2];
 
// pixman_transform_t embedded_transform;
 
union {
struct {
float dx, dy, offset;
} linear;
struct {
uint32_t pixel;
} gen2;
struct gen3_shader_channel {
int type;
uint32_t mode;
uint32_t constants;
} gen3;
} u;
} src, mask;
uint32_t is_affine : 1;
uint32_t has_component_alpha : 1;
uint32_t need_magic_ca_pass : 1;
uint32_t rb_reversed : 1;
 
int16_t floats_per_vertex;
int16_t floats_per_rect;
fastcall void (*prim_emit)(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
 
struct sna_composite_redirect {
struct kgem_bo *real_bo;
struct sna_damage **real_damage, *damage;
BoxRec box;
} redirect;
 
union {
struct sna_blt_state {
PixmapPtr src_pixmap;
int16_t sx, sy;
 
uint32_t inplace :1;
uint32_t overwrites:1;
uint32_t bpp : 6;
 
uint32_t cmd;
uint32_t br13;
uint32_t pitch[2];
uint32_t pixel;
struct kgem_bo *bo[2];
} blt;
 
struct {
float constants[8];
uint32_t num_constants;
} gen3;
 
struct {
int wm_kernel;
int ve_id;
} gen4;
 
struct {
int16_t wm_kernel;
int16_t ve_id;
} gen5;
 
struct {
uint32_t flags;
} gen6;
 
struct {
uint32_t flags;
} gen7;
} u;
 
void *priv;
};
 
struct sna_copy_op {
struct sna_composite_op base;
 
void (*blt)(struct sna *sna, const struct sna_copy_op *op,
int16_t sx, int16_t sy,
int16_t w, int16_t h,
int16_t dx, int16_t dy);
void (*done)(struct sna *sna, const struct sna_copy_op *op);
};
 
struct sna_render {
int active;
int caps;
 
int max_3d_size;
int max_3d_pitch;
 
unsigned prefer_gpu;
#define PREFER_GPU_BLT 0x1
#define PREFER_GPU_RENDER 0x2
#define PREFER_GPU_SPANS 0x4
 
bool (*composite)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src, PicturePtr mask,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t w, int16_t h,
struct sna_composite_op *tmp);
 
#if 0
bool (*check_composite_spans)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src,
int16_t w, int16_t h, unsigned flags);
bool (*composite_spans)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t w, int16_t h,
unsigned flags,
struct sna_composite_spans_op *tmp);
#define COMPOSITE_SPANS_RECTILINEAR 0x1
#define COMPOSITE_SPANS_INPLACE_HINT 0x2
 
bool (*video)(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap);
 
bool (*fill_boxes)(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n);
bool (*fill)(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
struct sna_fill_op *tmp);
bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint8_t alu);
bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo);
 
bool (*copy_boxes)(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags);
#define COPY_LAST 0x1
#define COPY_SYNC 0x2
 
#endif
 
bool (*blit_tex)(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
bool (*copy)(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
struct sna_copy_op *op);
 
void (*flush)(struct sna *sna);
void (*reset)(struct sna *sna);
void (*fini)(struct sna *sna);
 
#if 0
 
struct sna_alpha_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[256+7];
} alpha_cache;
 
struct sna_solid_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[1024];
uint32_t color[1025];
int last;
int size;
int dirty;
} solid_cache;
 
struct {
struct sna_gradient_cache {
struct kgem_bo *bo;
int nstops;
PictGradientStop *stops;
} cache[GRADIENT_CACHE_SIZE];
int size;
} gradient_cache;
 
struct sna_glyph_cache{
PicturePtr picture;
struct sna_glyph **glyphs;
uint16_t count;
uint16_t evict;
} glyph[2];
pixman_image_t *white_image;
PicturePtr white_picture;
#if HAS_PIXMAN_GLYPHS
pixman_glyph_cache_t *glyph_cache;
#endif
 
#endif
 
uint16_t vb_id;
uint16_t vertex_offset;
uint16_t vertex_start;
uint16_t vertex_index;
uint16_t vertex_used;
uint16_t vertex_size;
uint16_t vertex_reloc[16];
int nvertex_reloc;
 
struct kgem_bo *vbo;
float *vertices;
 
float vertex_data[1024];
};
 
struct gen2_render_state {
uint32_t target;
bool need_invariant;
uint32_t logic_op_enabled;
uint32_t ls1, ls2, vft;
uint32_t diffuse;
uint32_t specular;
};
 
struct gen3_render_state {
uint32_t current_dst;
bool need_invariant;
uint32_t tex_count;
uint32_t last_drawrect_limit;
uint32_t last_target;
uint32_t last_blend;
uint32_t last_constants;
uint32_t last_sampler;
uint32_t last_shader;
uint32_t last_diffuse;
uint32_t last_specular;
 
uint16_t last_vertex_offset;
uint16_t floats_per_vertex;
uint16_t last_floats_per_vertex;
 
uint32_t tex_map[4];
uint32_t tex_handle[2];
uint32_t tex_delta[2];
};
 
struct gen4_render_state {
struct kgem_bo *general_bo;
 
uint32_t vs;
uint32_t sf;
uint32_t wm;
uint32_t cc;
 
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t last_pipelined_pointers;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool needs_urb;
};
 
struct gen5_render_state {
struct kgem_bo *general_bo;
 
uint32_t vs;
uint32_t sf[2];
uint32_t wm;
uint32_t cc;
 
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
uint16_t last_pipelined_pointers;
 
bool needs_invariant;
};
 
enum {
GEN6_WM_KERNEL_NOMASK = 0,
GEN6_WM_KERNEL_NOMASK_P,
 
GEN6_WM_KERNEL_MASK,
GEN6_WM_KERNEL_MASK_P,
 
GEN6_WM_KERNEL_MASKCA,
GEN6_WM_KERNEL_MASKCA_P,
 
GEN6_WM_KERNEL_MASKSA,
GEN6_WM_KERNEL_MASKSA_P,
 
GEN6_WM_KERNEL_OPACITY,
GEN6_WM_KERNEL_OPACITY_P,
 
GEN6_WM_KERNEL_VIDEO_PLANAR,
GEN6_WM_KERNEL_VIDEO_PACKED,
GEN6_KERNEL_COUNT
};
 
struct gen6_render_state {
const struct gt_info *info;
struct kgem_bo *general_bo;
 
uint32_t vs_state;
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
 
uint32_t cc_blend;
 
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t blend;
uint32_t samplers;
uint32_t kernel;
 
uint16_t num_sf_outputs;
uint16_t ve_id;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool first_state_packet;
};
 
enum {
GEN7_WM_KERNEL_NOMASK = 0,
GEN7_WM_KERNEL_NOMASK_P,
 
GEN7_WM_KERNEL_MASK,
GEN7_WM_KERNEL_MASK_P,
 
GEN7_WM_KERNEL_MASKCA,
GEN7_WM_KERNEL_MASKCA_P,
 
GEN7_WM_KERNEL_MASKSA,
GEN7_WM_KERNEL_MASKSA_P,
 
GEN7_WM_KERNEL_OPACITY,
GEN7_WM_KERNEL_OPACITY_P,
 
GEN7_WM_KERNEL_VIDEO_PLANAR,
GEN7_WM_KERNEL_VIDEO_PACKED,
GEN7_WM_KERNEL_COUNT
};
 
struct gen7_render_state {
const struct gt_info *info;
struct kgem_bo *general_bo;
 
uint32_t vs_state;
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3];
 
uint32_t cc_blend;
 
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t blend;
uint32_t samplers;
uint32_t kernel;
 
uint16_t num_sf_outputs;
uint16_t ve_id;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool emit_flush;
};
 
struct sna_static_stream {
uint32_t size, used;
uint8_t *data;
};
 
int sna_static_stream_init(struct sna_static_stream *stream);
uint32_t sna_static_stream_add(struct sna_static_stream *stream,
const void *data, uint32_t len, uint32_t align);
void *sna_static_stream_map(struct sna_static_stream *stream,
uint32_t len, uint32_t align);
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
void *ptr);
unsigned sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *));
 
unsigned sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *, int),
int width);
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream);
 
struct kgem_bo *
sna_render_get_solid(struct sna *sna,
uint32_t color);
 
void
sna_render_flush_solid(struct sna *sna);
 
 
uint32_t sna_rgba_for_color(uint32_t color, int depth);
uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format);
bool sna_get_rgba_from_pixel(uint32_t pixel,
uint16_t *red,
uint16_t *green,
uint16_t *blue,
uint16_t *alpha,
uint32_t format);
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
void no_render_init(struct sna *sna);
 
bool gen2_render_init(struct sna *sna);
bool gen3_render_init(struct sna *sna);
bool gen4_render_init(struct sna *sna);
bool gen5_render_init(struct sna *sna);
bool gen6_render_init(struct sna *sna);
bool gen7_render_init(struct sna *sna);
 
#if 0
 
bool sna_tiling_composite(uint32_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t mask_x, int16_t mask_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp);
bool sna_tiling_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n);
 
bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n);
 
bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
int bpp, const BoxRec *box, int nbox);
 
bool sna_blt_composite(struct sna *sna,
uint32_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp,
bool fallback);
bool sna_blt_composite__convert(struct sna *sna,
int x, int y,
int width, int height,
struct sna_composite_op *tmp);
 
bool sna_blt_fill(struct sna *sna, uint8_t alu,
struct kgem_bo *bo,
int bpp,
uint32_t pixel,
struct sna_fill_op *fill);
 
bool sna_blt_copy(struct sna *sna, uint8_t alu,
struct kgem_bo *src,
struct kgem_bo *dst,
int bpp,
struct sna_copy_op *copy);
 
bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *bo,
int bpp,
uint32_t pixel,
const BoxRec *box, int n);
 
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
int bpp,
const BoxRec *box, int n);
bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int nbox);
 
bool _sna_get_pixel_from_rgba(uint32_t *pixel,
uint16_t red,
uint16_t green,
uint16_t blue,
uint16_t alpha,
uint32_t format);
 
static inline bool
sna_get_pixel_from_rgba(uint32_t * pixel,
uint16_t red,
uint16_t green,
uint16_t blue,
uint16_t alpha,
uint32_t format)
{
switch (format) {
case PICT_x8r8g8b8:
alpha = 0xffff;
/* fall through to re-use a8r8g8b8 expansion */
case PICT_a8r8g8b8:
*pixel = ((alpha >> 8 << 24) |
(red >> 8 << 16) |
(green & 0xff00) |
(blue >> 8));
return TRUE;
case PICT_a8:
*pixel = alpha >> 8;
return TRUE;
}
 
return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format);
}
 
struct kgem_bo *
__sna_render_pixmap_bo(struct sna *sna,
PixmapPtr pixmap,
const BoxRec *box,
bool blt);
 
int
sna_render_pixmap_bo(struct sna *sna,
struct sna_composite_channel *channel,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
bool
sna_render_pixmap_partial(struct sna *sna,
PixmapPtr pixmap,
struct kgem_bo *bo,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h);
 
int
sna_render_picture_extract(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_approximate_gradient(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_fixup(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_convert(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y,
bool fixup_alpha);
 
inline static void sna_render_composite_redirect_init(struct sna_composite_op *op)
{
struct sna_composite_redirect *t = &op->redirect;
t->real_bo = NULL;
t->damage = NULL;
}
 
bool
sna_render_composite_redirect(struct sna *sna,
struct sna_composite_op *op,
int x, int y, int width, int height);
 
void
sna_render_composite_redirect_done(struct sna *sna,
const struct sna_composite_op *op);
 
bool
sna_composite_mask_is_opaque(PicturePtr mask);
 
#endif
void sna_vertex_init(struct sna *sna);
 
static inline void sna_vertex_lock(struct sna_render *r)
{
// pthread_mutex_lock(&r->lock);
}
 
static inline void sna_vertex_acquire__locked(struct sna_render *r)
{
r->active++;
}
 
static inline void sna_vertex_unlock(struct sna_render *r)
{
// pthread_mutex_unlock(&r->lock);
}
 
static inline void sna_vertex_release__locked(struct sna_render *r)
{
assert(r->active > 0);
--r->active;
// if (--r->active == 0)
// pthread_cond_signal(&r->wait);
}
 
static inline bool sna_vertex_wait__locked(struct sna_render *r)
{
bool was_active = r->active;
// while (r->active)
// pthread_cond_wait(&r->wait, &r->lock);
return was_active;
}
 
#endif /* SNA_RENDER_H */
/drivers/video/Intel-2D/brw/brw_sf.c
0,0 → 1,54
#include "brw.h"
 
bool brw_sf_kernel__nomask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2, delta;
 
v0 = brw_vec4_grf(3, 0);
v1 = brw_vec4_grf(4, 0);
v2 = brw_vec4_grf(5, 0);
delta = brw_vec8_grf(7, 0);
 
inv = brw_vec4_grf(6, 0);
brw_math_invert(p, inv, brw_vec4_grf(1, 11));
 
brw_MOV(p, brw_message_reg(3), v0);
 
brw_ADD(p, delta, v1, brw_negate(v2));
brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0));
 
brw_ADD(p, delta, v2, brw_negate(v0));
brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2));
 
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
 
return true;
}
 
bool brw_sf_kernel__mask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2;
 
v0 = brw_vec8_grf(3, 0);
v1 = brw_vec8_grf(4, 0);
v2 = brw_vec8_grf(5, 0);
 
inv = brw_vec4_grf(6, 0);
brw_math_invert(p, inv, brw_vec4_grf(1, 11));
 
brw_MOV(p, brw_message_reg(3), v0);
 
brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2));
brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0));
 
brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0));
brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2));
 
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
 
return true;
}
/drivers/video/Intel-2D/brw/brw.h
0,0 → 1,17
#include "brw_eu.h"
 
bool brw_sf_kernel__nomask(struct brw_compile *p);
bool brw_sf_kernel__mask(struct brw_compile *p);
 
bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width);
/drivers/video/Intel-2D/brw/brw_eu.c
0,0 → 1,150
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/* Returns the corresponding conditional mod for swapping src0 and
* src1 in e.g. CMP.
*/
uint32_t
brw_swap_cmod(uint32_t cmod)
{
switch (cmod) {
case BRW_CONDITIONAL_Z:
case BRW_CONDITIONAL_NZ:
return cmod;
case BRW_CONDITIONAL_G:
return BRW_CONDITIONAL_LE;
case BRW_CONDITIONAL_GE:
return BRW_CONDITIONAL_L;
case BRW_CONDITIONAL_L:
return BRW_CONDITIONAL_GE;
case BRW_CONDITIONAL_LE:
return BRW_CONDITIONAL_G;
default:
return ~0;
}
}
 
/* How does predicate control work when execution_size != 8? Do I
* need to test/set for 0xffff when execution_size is 16?
*/
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
{
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
if (value != 0xff) {
if (value != p->flag_value) {
brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
p->flag_value = value;
}
 
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
}
 
void brw_set_compression_control(struct brw_compile *p,
enum brw_compression compression_control)
{
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
 
if (p->gen >= 060) {
/* Since we don't use the 32-wide support in gen6, we translate
* the pre-gen6 compression control here.
*/
switch (compression_control) {
case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1Q;
break;
case BRW_COMPRESSION_2NDHALF:
/* For 8-wide, this is "use the second set of 8 bits." */
p->current->header.compression_control = GEN6_COMPRESSION_2Q;
break;
case BRW_COMPRESSION_COMPRESSED:
/* For 16-wide instruction compression, use the first set of 16 bits
* since we don't do 32-wide dispatch.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
default:
assert(!"not reached");
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
}
} else {
p->current->header.compression_control = compression_control;
}
}
 
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
 
void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
p->compressed = p->compressed_stack[p->current - p->stack];
}
 
void brw_compile_init(struct brw_compile *p, int gen, void *store)
{
assert(gen);
 
p->gen = gen;
p->store = store;
 
p->nr_insn = 0;
p->current = p->stack;
p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
 
/* Some defaults?
*/
brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
brw_set_saturate(p, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_predicate_control_flag_value(p, 0xff);
 
p->if_stack_depth = 0;
p->if_stack_array_size = 0;
p->if_stack = NULL;
}
/drivers/video/Intel-2D/brw/brw_eu.h
0,0 → 1,2266
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
 
#ifndef BRW_EU_H
#define BRW_EU_H
 
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
 
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
 
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
 
#define WRITEMASK_X 0x1
#define WRITEMASK_Y 0x2
#define WRITEMASK_Z 0x4
#define WRITEMASK_W 0x8
 
#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
 
/** Number of general purpose registers (VS, WM, etc) */
#define BRW_MAX_GRF 128
 
/** Number of message register file registers */
#define BRW_MAX_MRF 16
 
 
#define BRW_ALIGN_1 0
#define BRW_ALIGN_16 1
 
#define BRW_ADDRESS_DIRECT 0
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define BRW_CHANNEL_X 0
#define BRW_CHANNEL_Y 1
#define BRW_CHANNEL_Z 2
#define BRW_CHANNEL_W 3
 
enum brw_compression {
BRW_COMPRESSION_NONE,
BRW_COMPRESSION_2NDHALF,
BRW_COMPRESSION_COMPRESSED,
};
 
#define GEN6_COMPRESSION_1Q 0
#define GEN6_COMPRESSION_2Q 1
#define GEN6_COMPRESSION_3Q 2
#define GEN6_COMPRESSION_4Q 3
#define GEN6_COMPRESSION_1H 0
#define GEN6_COMPRESSION_2H 2
 
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
#define BRW_CONDITIONAL_EQ 1 /* Z */
#define BRW_CONDITIONAL_NEQ 2 /* NZ */
#define BRW_CONDITIONAL_G 3
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
#define BRW_CONDITIONAL_U 9
 
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
 
#define BRW_DEPENDENCY_NORMAL 0
#define BRW_DEPENDENCY_NOTCLEARED 1
#define BRW_DEPENDENCY_NOTCHECKED 2
#define BRW_DEPENDENCY_DISABLE 3
 
#define BRW_EXECUTE_1 0
#define BRW_EXECUTE_2 1
#define BRW_EXECUTE_4 2
#define BRW_EXECUTE_8 3
#define BRW_EXECUTE_16 4
#define BRW_EXECUTE_32 5
 
#define BRW_HORIZONTAL_STRIDE_0 0
#define BRW_HORIZONTAL_STRIDE_1 1
#define BRW_HORIZONTAL_STRIDE_2 2
#define BRW_HORIZONTAL_STRIDE_4 3
 
#define BRW_INSTRUCTION_NORMAL 0
#define BRW_INSTRUCTION_SATURATE 1
 
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
 
/** @{
*
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
* effectively the same but much simpler to think about. Now, there
* are two contributors ANDed together to whether channels are
* executed: The predication on the instruction, and the channel write
* enable.
*/
/**
* This is the default value. It means that a channel's write enable is set
* if the per-channel IP is pointing at this instruction.
*/
#define BRW_WE_NORMAL 0
/**
* This is used like BRW_MASK_DISABLE, and causes all channels to have
* their write enable set. Note that predication still contributes to
* whether the channel actually gets written.
*/
#define BRW_WE_ALL 1
/** @} */
 
enum opcode {
/* These are the actual hardware opcodes. */
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
BRW_OPCODE_RSR = 10,
BRW_OPCODE_RSL = 11,
BRW_OPCODE_ASR = 12,
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_JMPI = 32,
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF = 35,
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
BRW_OPCODE_DO = 38,
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
BRW_OPCODE_MSAVE = 44,
BRW_OPCODE_MRESTORE = 45,
BRW_OPCODE_PUSH = 46,
BRW_OPCODE_POP = 47,
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
BRW_OPCODE_MATH = 56,
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
BRW_OPCODE_FRC = 67,
BRW_OPCODE_RNDU = 68,
BRW_OPCODE_RNDD = 69,
BRW_OPCODE_RNDE = 70,
BRW_OPCODE_RNDZ = 71,
BRW_OPCODE_MAC = 72,
BRW_OPCODE_MACH = 73,
BRW_OPCODE_LZD = 74,
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
BRW_OPCODE_DPA2 = 88,
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90,
BRW_OPCODE_NOP = 126,
 
/* These are compiler backend opcodes that get translated into other
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
SHADER_OPCODE_EXP2,
SHADER_OPCODE_LOG2,
SHADER_OPCODE_POW,
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
FS_OPCODE_TXD,
FS_OPCODE_TXF,
FS_OPCODE_TXL,
FS_OPCODE_TXS,
FS_OPCODE_DISCARD,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
 
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
};
 
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
#define BRW_PREDICATE_ALIGN1_ANYV 2
#define BRW_PREDICATE_ALIGN1_ALLV 3
#define BRW_PREDICATE_ALIGN1_ANY2H 4
#define BRW_PREDICATE_ALIGN1_ALL2H 5
#define BRW_PREDICATE_ALIGN1_ANY4H 6
#define BRW_PREDICATE_ALIGN1_ALL4H 7
#define BRW_PREDICATE_ALIGN1_ANY8H 8
#define BRW_PREDICATE_ALIGN1_ALL8H 9
#define BRW_PREDICATE_ALIGN1_ANY16H 10
#define BRW_PREDICATE_ALIGN1_ALL16H 11
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
#define BRW_PREDICATE_ALIGN16_ANY4H 6
#define BRW_PREDICATE_ALIGN16_ALL4H 7
 
#define BRW_ARCHITECTURE_REGISTER_FILE 0
#define BRW_GENERAL_REGISTER_FILE 1
#define BRW_MESSAGE_REGISTER_FILE 2
#define BRW_IMMEDIATE_VALUE 3
 
#define BRW_REGISTER_TYPE_UD 0
#define BRW_REGISTER_TYPE_D 1
#define BRW_REGISTER_TYPE_UW 2
#define BRW_REGISTER_TYPE_W 3
#define BRW_REGISTER_TYPE_UB 4
#define BRW_REGISTER_TYPE_B 5
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define BRW_REGISTER_TYPE_HF 6
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define BRW_REGISTER_TYPE_F 7
 
#define BRW_ARF_NULL 0x00
#define BRW_ARF_ADDRESS 0x10
#define BRW_ARF_ACCUMULATOR 0x20
#define BRW_ARF_FLAG 0x30
#define BRW_ARF_MASK 0x40
#define BRW_ARF_MASK_STACK 0x50
#define BRW_ARF_MASK_STACK_DEPTH 0x60
#define BRW_ARF_STATE 0x70
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
 
#define BRW_MRF_COMPR4 (1 << 7)
 
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
#define BRW_CMASK 3
 
#define BRW_THREAD_NORMAL 0
#define BRW_THREAD_ATOMIC 1
#define BRW_THREAD_SWITCH 2
 
#define BRW_VERTICAL_STRIDE_0 0
#define BRW_VERTICAL_STRIDE_1 1
#define BRW_VERTICAL_STRIDE_2 2
#define BRW_VERTICAL_STRIDE_4 3
#define BRW_VERTICAL_STRIDE_8 4
#define BRW_VERTICAL_STRIDE_16 5
#define BRW_VERTICAL_STRIDE_32 6
#define BRW_VERTICAL_STRIDE_64 7
#define BRW_VERTICAL_STRIDE_128 8
#define BRW_VERTICAL_STRIDE_256 9
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define BRW_WIDTH_1 0
#define BRW_WIDTH_2 1
#define BRW_WIDTH_4 2
#define BRW_WIDTH_8 3
#define BRW_WIDTH_16 4
 
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define BRW_POLYGON_FACING_FRONT 0
#define BRW_POLYGON_FACING_BACK 1
 
#define BRW_MESSAGE_TARGET_NULL 0
#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4
#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5
#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9
 
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define BRW_SAMPLER_MESSAGE_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
 
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
 
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
/* G45, GEN5 */
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
/* GEN6 */
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
 
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
/**
* Message target: Shared Function ID for where to SEND a message.
*
* These are enumerated in the ISA reference under "send - Send Message".
* In particular, see the following tables:
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
* Overview / GPE Function IDs
*/
enum brw_message_target {
BRW_SFID_NULL = 0,
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
BRW_SFID_SAMPLER = 2,
BRW_SFID_MESSAGE_GATEWAY = 3,
BRW_SFID_DATAPORT_READ = 4,
BRW_SFID_DATAPORT_WRITE = 5,
BRW_SFID_URB = 6,
BRW_SFID_THREAD_SPAWNER = 7,
 
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
GEN7_SFID_DATAPORT_DATA_CACHE = 10,
};
 
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
 
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
/* GEN6 */
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
 
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
#define BRW_MATH_FUNCTION_SQRT 4
#define BRW_MATH_FUNCTION_RSQ 5
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define BRW_MATH_INTEGER_UNSIGNED 0
#define BRW_MATH_INTEGER_SIGNED 1
 
#define BRW_MATH_PRECISION_FULL 0
#define BRW_MATH_PRECISION_PARTIAL 1
 
#define BRW_MATH_SATURATE_NONE 0
#define BRW_MATH_SATURATE_SATURATE 1
 
#define BRW_MATH_DATA_VECTOR 0
#define BRW_MATH_DATA_SCALAR 1
 
#define BRW_URB_OPCODE_WRITE 0
 
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
#define BRW_URB_SWIZZLE_TRANSPOSE 2
 
#define BRW_SCRATCH_SPACE_SIZE_1K 0
#define BRW_SCRATCH_SPACE_SIZE_2K 1
#define BRW_SCRATCH_SPACE_SIZE_4K 2
#define BRW_SCRATCH_SPACE_SIZE_8K 3
#define BRW_SCRATCH_SPACE_SIZE_16K 4
#define BRW_SCRATCH_SPACE_SIZE_32K 5
#define BRW_SCRATCH_SPACE_SIZE_64K 6
#define BRW_SCRATCH_SPACE_SIZE_128K 7
#define BRW_SCRATCH_SPACE_SIZE_256K 8
#define BRW_SCRATCH_SPACE_SIZE_512K 9
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
 
#define REG_SIZE (8*4)
 
struct brw_instruction {
struct {
unsigned opcode:7;
unsigned pad:1;
unsigned access_mode:1;
unsigned mask_control:1;
unsigned dependency_control:2;
unsigned compression_control:2; /* gen6: quater control */
unsigned thread_control:2;
unsigned predicate_control:4;
unsigned predicate_inverse:1;
unsigned execution_size:3;
/**
* Conditional Modifier for most instructions. On Gen6+, this is also
* used for the SEND instruction's Message Target/SFID.
*/
unsigned destreg__conditionalmod:4;
unsigned acc_wr_control:1;
unsigned cmpt_control:1;
unsigned debug_control:1;
unsigned saturate:1;
} header;
 
union {
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_subreg_nr:5;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2; /* 0x00000c00 */
unsigned src1_reg_type:3; /* 0x00007000 */
unsigned pad:1;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:1;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned pad0:6;
unsigned dest_writemask:4;
int dest_indirect_offset:6;
unsigned dest_subreg_nr:3;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
 
int jump_count:16;
} branch_gen6;
 
struct {
unsigned dest_reg_file:1;
unsigned flag_subreg_num:1;
unsigned pad0:2;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src2_abs:1;
unsigned src2_negate:1;
unsigned pad1:7;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:3;
unsigned dest_reg_nr:8;
} da3src;
} bits1;
 
 
union {
struct {
unsigned src0_subreg_nr:5;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} da1;
 
struct {
int src0_indirect_offset:10;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} ia1;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
unsigned src0_subreg_nr:1;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} da16;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
int src0_indirect_offset:6;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia16;
 
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
*
* Does not apply to Gen6+. The SFID/message target moved to bits
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
*/
struct {
unsigned pad:26;
unsigned end_of_thread:1;
unsigned pad1:1;
unsigned sfid:4;
} send_gen5; /* for Ironlake only */
 
struct {
unsigned src0_rep_ctrl:1;
unsigned src0_swizzle:8;
unsigned src0_subreg_nr:3;
unsigned src0_reg_nr:8;
unsigned pad0:1;
unsigned src1_rep_ctrl:1;
unsigned src1_swizzle:8;
unsigned src1_subreg_nr_low:2;
} da3src;
} bits2;
 
union {
struct {
unsigned src1_subreg_nr:5;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned pad0:7;
} da1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
unsigned src1_subreg_nr:1;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned pad2:7;
} da16;
 
struct {
int src1_indirect_offset:10;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
int src1_indirect_offset:6;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned pad0:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad2:5;
} ia16;
 
struct {
int jump_count:16; /* note: signed */
unsigned pop_count:4;
unsigned pad0:12;
} if_else;
 
/* This is also used for gen7 IF/ELSE instructions */
struct {
/* Signed jump distance to the ip to jump to if all channels
* are disabled after the break or continue. It should point
* to the end of the innermost control flow block, as that's
* where some channel could get re-enabled.
*/
int jip:16;
 
/* Signed jump distance to the location to resume execution
* of this channel if it's enabled for the break or continue.
*/
int uip:16;
} break_cont;
 
/**
* \defgroup SEND instructions / Message Descriptors
*
* @{
*/
 
/**
* Generic Message Descriptor for Gen4 SEND instructions. The structs
* below expand function_control to something specific for their
* message. Due to struct packing issues, they duplicate these bits.
*
* See the G45 PRM, Volume 4, Table 14-15.
*/
struct {
unsigned function_control:16;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} generic;
 
/**
* Generic Message Descriptor for Gen5-7 SEND instructions.
*
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
* of the information on the SEND instruction is missing from the public
* Ironlake PRM.)
*
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
* According to the SEND instruction description:
* "The MSb of the message description, the EOT field, always comes from
* bit 127 of the instruction word"...which is bit 31 of this field.
*/
struct {
unsigned function_control:19;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} generic_gen5;
 
/** G45 PRM, Volume 4, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned pad0:8;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} math;
 
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned snapshot:1;
unsigned pad0:10;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} math_gen5;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned return_format:2;
unsigned msg_type:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned simd_mode:2;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen5;
 
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:5;
unsigned simd_mode:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen7;
 
struct brw_urb_immediate {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} urb;
 
struct {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen5;
 
struct {
unsigned opcode:3;
unsigned offset:11;
unsigned swizzle_control:1;
unsigned complete:1;
unsigned per_slot_offset:1;
unsigned pad0:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen7;
 
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
struct {
unsigned binding_table_index:8;
unsigned msg_control:4;
unsigned msg_type:2;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_read_gen5;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_write;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_write_gen5;
 
/**
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
*
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
**/
struct {
unsigned binding_table_index:8;
unsigned msg_control:5;
unsigned msg_type:3;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp_sampler_const_cache;
 
/**
* Message for the Sandybridge Render Cache Data Port.
*
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
* Section 3.9.2.1.1: Message Descriptor.
*
* "Slot Group Select" and "Last Render Target" are part of the
* 5-bit message control for Render Target Write messages. See
* Section 3.9.9.2.1 of the same volume.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_type:4;
unsigned send_commit_msg:1;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp;
 
/**
* Message for any of the Gen7 Data Port caches.
*
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
* Data Port Messages / Message Descriptor. Once again, "Slot Group
* Select" and "Last Render Target" are part of the 6-bit message
* control for Render Target Writes.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_control_pad:1;
unsigned msg_type:4;
unsigned pad1:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad2:2;
unsigned end_of_thread:1;
} gen7_dp;
/** @} */
 
struct {
unsigned src1_subreg_nr_high:1;
unsigned src1_reg_nr:8;
unsigned pad0:1;
unsigned src2_rep_ctrl:1;
unsigned src2_swizzle:8;
unsigned src2_subreg_nr:3;
unsigned src2_reg_nr:8;
unsigned pad1:2;
} da3src;
 
int d;
unsigned ud;
float f;
} bits3;
};
 
 
/* These aren't hardware structs, just something useful for us to pass around:
*
* Align1 operation has a lot of control over input ranges. Used in
* WM programs to implement shaders decomposed into "channel serial"
* or "structure of array" form:
*/
struct brw_reg {
unsigned type:4;
unsigned file:2;
unsigned nr:8;
unsigned subnr:5; /* :1 in align16 */
unsigned negate:1; /* source only */
unsigned abs:1; /* source only */
unsigned vstride:4; /* source only */
unsigned width:3; /* src only, align1 only */
unsigned hstride:2; /* align1 only */
unsigned address_mode:1; /* relative addressing, hopefully! */
unsigned pad0:1;
 
union {
struct {
unsigned swizzle:8; /* src only, align16 only */
unsigned writemask:4; /* dest only, align16 only */
int indirect_offset:10; /* relative addressing offset */
unsigned pad1:10; /* two dwords total */
} bits;
 
float f;
int d;
unsigned ud;
} dw1;
};
 
struct brw_indirect {
unsigned addr_subnr:4;
int addr_offset:10;
unsigned pad:18;
};
 
#define BRW_EU_MAX_INSN_STACK 5
#define BRW_EU_MAX_INSN 10000
 
struct brw_compile {
struct brw_instruction *store;
unsigned nr_insn;
 
int gen;
 
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
 
unsigned flag_value;
bool single_program_flow;
bool compressed;
 
/* Control flow stacks:
* - if_stack contains IF and ELSE instructions which must be patched
* (and popped) once the matching ENDIF instruction is encountered.
*/
struct brw_instruction **if_stack;
int if_stack_depth;
int if_stack_array_size;
};
 
static inline int type_sz(unsigned type)
{
switch (type) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
return 4;
case BRW_REGISTER_TYPE_HF:
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_W:
return 2;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
return 1;
default:
return 0;
}
}
 
/**
* Construct a brw_reg.
* \param file one of the BRW_x_REGISTER_FILE values
* \param nr register number/index
* \param subnr register sub number
* \param type one of BRW_REGISTER_TYPE_x
* \param vstride one of BRW_VERTICAL_STRIDE_x
* \param width one of BRW_WIDTH_x
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
* \param swizzle one of BRW_SWIZZLE_x
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
static inline struct brw_reg brw_reg(unsigned file,
unsigned nr,
unsigned subnr,
unsigned type,
unsigned vstride,
unsigned width,
unsigned hstride,
unsigned swizzle,
unsigned writemask)
{
struct brw_reg reg;
if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
else if (file == BRW_MESSAGE_REGISTER_FILE)
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
 
reg.type = type;
reg.file = file;
reg.nr = nr;
reg.subnr = subnr * type_sz(type);
reg.negate = 0;
reg.abs = 0;
reg.vstride = vstride;
reg.width = width;
reg.hstride = hstride;
reg.address_mode = BRW_ADDRESS_DIRECT;
reg.pad0 = 0;
 
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
* set swizzle and writemask to W, as the lower bits of subnr will
* be lost when converted to align16. This is probably too much to
* keep track of as you'd want it adjusted by suboffset(), etc.
* Perhaps fix up when converting to align16?
*/
reg.dw1.bits.swizzle = swizzle;
reg.dw1.bits.writemask = writemask;
reg.dw1.bits.indirect_offset = 0;
reg.dw1.bits.pad1 = 0;
return reg;
}
 
/** Construct float[16] register */
static inline struct brw_reg brw_vec16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_16,
BRW_WIDTH_16,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[8] register */
static inline struct brw_reg brw_vec8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_8,
BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[4] register */
static inline struct brw_reg brw_vec4_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_4,
BRW_WIDTH_4,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[2] register */
static inline struct brw_reg brw_vec2_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_2,
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYXY,
WRITEMASK_XY);
}
 
/** Construct float[1] register */
static inline struct brw_reg brw_vec1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
 
static inline struct brw_reg __retype(struct brw_reg reg,
unsigned type)
{
reg.type = type;
return reg;
}
 
static inline struct brw_reg __retype_d(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg __retype_ud(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg __retype_uw(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg __sechalf(struct brw_reg reg)
{
if (reg.vstride)
reg.nr++;
return reg;
}
 
static inline struct brw_reg __suboffset(struct brw_reg reg,
unsigned delta)
{
reg.subnr += delta * type_sz(reg.type);
return reg;
}
 
static inline struct brw_reg __offset(struct brw_reg reg,
unsigned delta)
{
reg.nr += delta;
return reg;
}
 
static inline struct brw_reg byte_offset(struct brw_reg reg,
unsigned bytes)
{
unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
reg.nr = newoffset / REG_SIZE;
reg.subnr = newoffset % REG_SIZE;
return reg;
}
 
 
/** Construct unsigned word[16] register */
static inline struct brw_reg brw_uw16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[8] register */
static inline struct brw_reg brw_uw8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[1] register */
static inline struct brw_reg brw_uw1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
static inline struct brw_reg brw_imm_reg(unsigned type)
{
return brw_reg( BRW_IMMEDIATE_VALUE,
0,
0,
type,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
0,
0);
}
 
/** Construct float immediate register */
static inline struct brw_reg brw_imm_f(float f)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
imm.dw1.f = f;
return imm;
}
 
/** Construct integer immediate register */
static inline struct brw_reg brw_imm_d(int d)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
imm.dw1.d = d;
return imm;
}
 
/** Construct uint immediate register */
static inline struct brw_reg brw_imm_ud(unsigned ud)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
imm.dw1.ud = ud;
return imm;
}
 
/** Construct ushort immediate register */
static inline struct brw_reg brw_imm_uw(uint16_t uw)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
imm.dw1.ud = uw | (uw << 16);
return imm;
}
 
/** Construct short immediate register */
static inline struct brw_reg brw_imm_w(int16_t w)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
imm.dw1.d = w | (w << 16);
return imm;
}
 
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
* numbers alias with _V and _VF below:
*/
 
/** Construct vector of eight signed half-byte values */
static inline struct brw_reg brw_imm_v(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_8;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
/** Construct vector of four 8-bit float values */
static inline struct brw_reg brw_imm_vf(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
#define VF_ZERO 0x0
#define VF_ONE 0x30
#define VF_NEG (1<<7)
 
static inline struct brw_reg brw_imm_vf4(unsigned v0,
unsigned v1,
unsigned v2,
unsigned v3)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = ((v0 << 0) |
(v1 << 8) |
(v2 << 16) |
(v3 << 24));
return imm;
}
 
static inline struct brw_reg brw_address(struct brw_reg reg)
{
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
 
/** Construct float[1] general-purpose register */
static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[2] general-purpose register */
static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
{
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[4] general-purpose register */
static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[8] general-purpose register */
static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
{
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
{
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct null register (usually used for setting condition codes) */
static inline struct brw_reg brw_null_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL,
0);
}
 
static inline struct brw_reg brw_address_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ADDRESS,
subnr);
}
 
/* If/else instructions break in align16 mode if writemask & swizzle
* aren't xyzw. This goes against the convention for other scalar
* regs:
*/
static inline struct brw_reg brw_ip_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_IP,
0,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_4, /* ? */
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, /* NOTE! */
WRITEMASK_XYZW); /* NOTE! */
}
 
static inline struct brw_reg brw_acc_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ACCUMULATOR,
0);
}
 
static inline struct brw_reg brw_notification_1_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NOTIFICATION_COUNT,
1,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
static inline struct brw_reg brw_flag_reg(void)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_FLAG,
0);
}
 
static inline struct brw_reg brw_mask_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_MASK,
subnr);
}
 
static inline struct brw_reg brw_message_reg(unsigned nr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
}
 
static inline struct brw_reg brw_message4_reg(unsigned nr,
int subnr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
}
 
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/
static inline unsigned cvt(unsigned val)
{
switch (val) {
case 0: return 0;
case 1: return 1;
case 2: return 2;
case 4: return 3;
case 8: return 4;
case 16: return 5;
case 32: return 6;
}
return 0;
}
 
static inline struct brw_reg __stride(struct brw_reg reg,
unsigned vstride,
unsigned width,
unsigned hstride)
{
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
reg.hstride = cvt(hstride);
return reg;
}
 
static inline struct brw_reg vec16(struct brw_reg reg)
{
return __stride(reg, 16,16,1);
}
 
static inline struct brw_reg vec8(struct brw_reg reg)
{
return __stride(reg, 8,8,1);
}
 
static inline struct brw_reg vec4(struct brw_reg reg)
{
return __stride(reg, 4,4,1);
}
 
static inline struct brw_reg vec2(struct brw_reg reg)
{
return __stride(reg, 2,2,1);
}
 
static inline struct brw_reg vec1(struct brw_reg reg)
{
return __stride(reg, 0,1,0);
}
 
static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(reg, elt));
}
 
static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
 
static inline struct brw_reg brw_swizzle(struct brw_reg reg,
unsigned x,
unsigned y,
unsigned z,
unsigned w)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
 
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
return reg;
}
 
static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
unsigned x)
{
return brw_swizzle(reg, x, x, x, x);
}
 
static inline struct brw_reg brw_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask &= mask;
return reg;
}
 
static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = mask;
return reg;
}
 
static inline struct brw_reg brw_negate(struct brw_reg reg)
{
reg.negate ^= 1;
return reg;
}
 
static inline struct brw_reg brw_abs(struct brw_reg reg)
{
reg.abs = 1;
return reg;
}
 
/***********************************************************************
*/
static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
{
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
{
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
{
return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
}
 
static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
{
return brw_address_reg(ptr.addr_subnr);
}
 
static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
{
ptr.addr_offset += offset;
return ptr;
}
 
static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
{
struct brw_indirect ptr;
ptr.addr_subnr = addr_subnr;
ptr.addr_offset = offset;
ptr.pad = 0;
return ptr;
}
 
/** Do two brw_regs refer to the same register? */
static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
{
return r1.file == r2.file && r1.nr == r2.nr;
}
 
static inline struct brw_instruction *current_insn( struct brw_compile *p)
{
return &p->store[p->nr_insn];
}
 
static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
{
p->current->header.predicate_control = pc;
}
 
static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
{
p->current->header.predicate_inverse = predicate_inverse;
}
 
static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
{
p->current->header.destreg__conditionalmod = conditional;
}
 
static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
{
p->current->header.access_mode = access_mode;
}
 
static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
{
p->current->header.mask_control = value;
}
 
static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
{
p->current->header.saturate = value;
}
 
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
{
if (p->gen >= 060)
p->current->header.acc_wr_control = value;
}
 
void brw_pop_insn_state(struct brw_compile *p);
void brw_push_insn_state(struct brw_compile *p);
void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
 
void brw_compile_init(struct brw_compile *p, int gen, void *store);
 
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest);
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg);
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg);
 
void gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr);
 
static inline struct brw_instruction *
brw_next_insn(struct brw_compile *p, unsigned opcode)
{
struct brw_instruction *insn;
 
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
 
insn = &p->store[p->nr_insn++];
*insn = *p->current;
 
if (p->current->header.destreg__conditionalmod) {
p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
 
insn->header.opcode = opcode;
return insn;
}
 
/* Helpers for regular instructions: */
#define ALU1(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0) \
{ \
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
}
 
#define ALU2(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1) \
{ \
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
}
 
/* Rounding operations (other than RNDD) require two instructions - the first
* stores a rounded value (possibly the wrong way) in the dest register, but
* also sets a per-channel "increment bit" in the flag register. A predicated
* add of 1.0 fixes dest to contain the desired result.
*
* Sandybridge and later appear to round correctly without an ADD.
*/
#define ROUND(OP) \
static inline void brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src) \
{ \
struct brw_instruction *rnd, *add; \
rnd = brw_next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
brw_set_src0(p, rnd, src); \
if (p->gen < 060) { \
/* turn on round-increments */ \
rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
add->header.predicate_control = BRW_PREDICATE_NORMAL; \
} \
}
 
static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src)
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
return insn;
}
 
static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 )
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
return insn;
}
 
static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.2.2: add */
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.32.38: mul */
if (src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD ||
src1.type == BRW_REGISTER_TYPE_D ||
src1.type == BRW_REGISTER_TYPE_UD) {
assert(dest.type != BRW_REGISTER_TYPE_F);
}
 
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src0.nr != BRW_ARF_ACCUMULATOR);
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src1.nr != BRW_ARF_ACCUMULATOR);
 
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
 
insn->header.execution_size = 1;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_DISABLE;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
 
ALU1(MOV);
ALU2(SEL);
ALU1(NOT);
ALU2(AND);
ALU2(OR);
ALU2(XOR);
ALU2(SHR);
ALU2(SHL);
ALU2(RSR);
ALU2(RSL);
ALU2(ASR);
ALU1(FRC);
ALU1(RNDD);
ALU2(MAC);
ALU2(MACH);
ALU1(LZD);
ALU2(DP4);
ALU2(DPH);
ALU2(DP3);
ALU2(DP2);
ALU2(LINE);
ALU2(PLN);
 
ROUND(RNDZ);
ROUND(RNDE);
 
#undef ALU1
#undef ALU2
#undef ROUND
 
/* Helpers for SEND instruction */
void brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length);
 
void brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg);
 
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle);
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot);
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present);
 
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode);
 
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision);
 
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision);
 
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1);
 
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index);
 
void brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index);
 
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index);
 
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addrReg,
unsigned offset,
unsigned bind_table_index);
 
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
struct brw_instruction *brw_IF(struct brw_compile *p,
unsigned execute_size);
struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1);
 
void brw_ELSE(struct brw_compile *p);
void brw_ENDIF(struct brw_compile *p);
 
/* DO/WHILE loops:
*/
struct brw_instruction *brw_DO(struct brw_compile *p,
unsigned execute_size);
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn);
 
void brw_NOP(struct brw_compile *p);
 
void brw_WAIT(struct brw_compile *p);
 
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1);
 
void brw_print_reg(struct brw_reg reg);
 
static inline void brw_math_invert(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
{
brw_math(p,
dst,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
src,
BRW_MATH_PRECISION_FULL,
BRW_MATH_DATA_VECTOR);
}
 
void brw_set_uip_jip(struct brw_compile *p);
 
uint32_t brw_swap_cmod(uint32_t cmod);
 
void brw_disasm(FILE *file,
const struct brw_instruction *inst,
int gen);
 
#endif
/drivers/video/Intel-2D/brw/brw_eu_emit.c
0,0 → 1,2002
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/***********************************************************************
* Internal helper for constructing instructions
*/
 
static void guess_execution_size(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width;
}
 
 
/**
* Prior to Sandybridge, the SEND instruction accepted non-MRF source
* registers, implicitly moving the operand to a message register.
*
* On Sandybridge, this is no longer the case. This function performs the
* explicit move; it should be called before emitting a SEND instruction.
*/
void
gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr)
{
if (p->gen < 060)
return;
 
if (src->file == BRW_MESSAGE_REGISTER_FILE)
return;
 
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
brw_pop_insn_state(p);
}
*src = brw_message_reg(msg_reg_nr);
}
 
static void
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
{
/* From the BSpec / ISA Reference / send - [DevIVB+]:
* "The send with EOT should use register space R112-R127 for <src>. This is
* to enable loading of a new thread into the same slot while the message
* with EOT for current thread is pending dispatch."
*
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += 111;
}
}
 
void
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
 
gen7_convert_mrf_to_grf(p, &dest);
 
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
 
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
insn->bits1.da1.dest_reg_nr = dest.nr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.da1.dest_horiz_stride = dest.hstride;
} else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
/* even ignored in da16, still need to set as '01' */
insn->bits1.da16.dest_horiz_stride = 1;
}
} else {
insn->bits1.ia1.dest_subreg_nr = dest.subnr;
 
/* These are different sizes in align1 vs align16:
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
/* even ignored in da16, still need to set as '01' */
insn->bits1.ia16.dest_horiz_stride = 1;
}
}
 
guess_execution_size(p, insn, dest);
}
 
static const int reg_type_size[8] = {
[0] = 4,
[1] = 4,
[2] = 2,
[3] = 2,
[4] = 1,
[5] = 1,
[7] = 4
};
 
static void
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
{
int hstride_for_reg[] = {0, 1, 2, 4};
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
int width_for_reg[] = {1, 2, 4, 8, 16};
int execsize_for_reg[] = {1, 2, 4, 8, 16};
int width, hstride, vstride, execsize;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
/* 3.3.6: Region Parameters. Restriction: Immediate vectors
* mean the destination has to be 128-bit aligned and the
* destination horiz stride has to be a word.
*/
if (reg.type == BRW_REGISTER_TYPE_V) {
assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
}
 
return;
}
 
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
reg.file == BRW_ARF_NULL)
return;
 
hstride = hstride_for_reg[reg.hstride];
 
if (reg.vstride == 0xf) {
vstride = -1;
} else {
vstride = vstride_for_reg[reg.vstride];
}
 
width = width_for_reg[reg.width];
 
execsize = execsize_for_reg[insn->header.execution_size];
 
/* Restrictions from 3.3.10: Register Region Restrictions. */
/* 3. */
assert(execsize >= width);
 
/* 4. */
if (execsize == width && hstride != 0) {
assert(vstride == -1 || vstride == width * hstride);
}
 
/* 5. */
if (execsize == width && hstride == 0) {
/* no restriction on vstride. */
}
 
/* 6. */
if (width == 1) {
assert(hstride == 0);
}
 
/* 7. */
if (execsize == 1 && width == 1) {
assert(hstride == 0);
assert(vstride == 0);
}
 
/* 8. */
if (vstride == 0 && hstride == 0) {
assert(width == 1);
}
 
/* 10. Check destination issues. */
}
 
void
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
insn->bits2.da1.src0_abs = reg.abs;
insn->bits2.da1.src0_negate = reg.negate;
insn->bits2.da1.src0_address_mode = reg.address_mode;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
 
/* Required to set some fields in src1 as well:
*/
insn->bits1.da1.src1_reg_file = 0; /* arf */
insn->bits1.da1.src1_reg_type = reg.type;
} else {
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.da1.src0_subreg_nr = reg.subnr;
insn->bits2.da1.src0_reg_nr = reg.nr;
} else {
insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
insn->bits2.da16.src0_reg_nr = reg.nr;
}
} else {
insn->bits2.ia1.src0_subreg_nr = reg.subnr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
} else {
insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
}
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits2.da1.src0_width = BRW_WIDTH_1;
insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits2.da1.src0_horiz_stride = reg.hstride;
insn->bits2.da1.src0_width = reg.width;
insn->bits2.da1.src0_vert_stride = reg.vstride;
}
} else {
insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits2.da16.src0_vert_stride = reg.vstride;
}
}
}
 
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
insn->bits3.da1.src1_abs = reg.abs;
insn->bits3.da1.src1_negate = reg.negate;
 
/* Only src1 can be immediate in two-argument instructions. */
assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
} else {
/* This is a hardware restriction, which may or may not be lifted
* in the future:
*/
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits3.da1.src1_subreg_nr = reg.subnr;
insn->bits3.da1.src1_reg_nr = reg.nr;
} else {
insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
insn->bits3.da16.src1_reg_nr = reg.nr;
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits3.da1.src1_width = BRW_WIDTH_1;
insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits3.da1.src1_horiz_stride = reg.hstride;
insn->bits3.da1.src1_width = reg.width;
insn->bits3.da1.src1_vert_stride = reg.vstride;
}
} else {
insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits3.da16.src1_vert_stride = reg.vstride;
}
}
}
 
/**
* Set the Message Descriptor and Extended Message Descriptor fields
* for SEND messages.
*
* \note This zeroes out the Function Control bits, so it must be called
* \b before filling out any message-specific data. Callers can
* choose not to fill in irrelevant bits; they will be zero.
*/
static void
brw_set_message_descriptor(struct brw_compile *p,
struct brw_instruction *inst,
enum brw_message_target sfid,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread)
{
brw_set_src1(p, inst, brw_imm_d(0));
 
if (p->gen >= 050) {
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
 
if (p->gen >= 060) {
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
inst->header.destreg__conditionalmod = sfid;
} else {
/* Set Extended Message Descriptor (ex_desc) */
inst->bits2.send_gen5.sfid = sfid;
inst->bits2.send_gen5.end_of_thread = end_of_thread;
}
} else {
inst->bits3.generic.response_length = response_length;
inst->bits3.generic.msg_length = msg_length;
inst->bits3.generic.msg_target = sfid;
inst->bits3.generic.end_of_thread = end_of_thread;
}
}
 
 
static void brw_set_math_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned function,
unsigned integer_type,
bool low_precision,
bool saturate,
unsigned dataType)
{
unsigned msg_length;
unsigned response_length;
 
/* Infer message length from the function */
switch (function) {
case BRW_MATH_FUNCTION_POW:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
msg_length = 2;
break;
default:
msg_length = 1;
break;
}
 
/* Infer response length from the function */
switch (function) {
case BRW_MATH_FUNCTION_SINCOS:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
response_length = 2;
break;
default:
response_length = 1;
break;
}
 
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
msg_length, response_length,
false, false);
if (p->gen == 050) {
insn->bits3.math_gen5.function = function;
insn->bits3.math_gen5.int_type = integer_type;
insn->bits3.math_gen5.precision = low_precision;
insn->bits3.math_gen5.saturate = saturate;
insn->bits3.math_gen5.data_type = dataType;
insn->bits3.math_gen5.snapshot = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
insn->bits3.math.precision = low_precision;
insn->bits3.math.saturate = saturate;
insn->bits3.math.data_type = dataType;
}
}
 
static void brw_set_ff_sync_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
unsigned response_length,
bool end_of_thread)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
1, response_length,
true, end_of_thread);
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
}
 
static void brw_set_urb_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool end_of_thread,
bool complete,
unsigned offset,
unsigned swizzle_control)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
msg_length, response_length, true, end_of_thread);
if (p->gen >= 070) {
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
insn->bits3.urb_gen7.offset = offset;
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
insn->bits3.urb_gen7.swizzle_control = swizzle_control;
/* per_slot_offset = 0 makes it ignore offsets in message header */
insn->bits3.urb_gen7.per_slot_offset = 0;
insn->bits3.urb_gen7.complete = complete;
} else if (p->gen >= 050) {
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
insn->bits3.urb_gen5.offset = offset;
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = used; /* ? */
insn->bits3.urb_gen5.complete = complete;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
insn->bits3.urb.swizzle_control = swizzle_control;
insn->bits3.urb.allocate = allocate;
insn->bits3.urb.used = used; /* ? */
insn->bits3.urb.complete = complete;
}
}
 
void
brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg)
{
unsigned sfid;
 
if (p->gen >= 070) {
/* Use the Render Cache for RT writes; otherwise use the Data Cache */
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
/* Use the render cache for all write messages. */
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_WRITE;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
header_present, end_of_thread);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = last_render_target;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = last_render_target;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
} else if (p->gen >= 050) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.last_render_target = last_render_target;
insn->bits3.dp_write_gen5.msg_type = msg_type;
insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
insn->bits3.dp_write.last_render_target = last_render_target;
insn->bits3.dp_write.msg_type = msg_type;
insn->bits3.dp_write.send_commit_msg = send_commit_msg;
}
}
 
void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length)
{
unsigned sfid;
 
if (p->gen >= 070) {
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_READ;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
true, false);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = 0;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = 0;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = 0;
} else if (p->gen >= 050) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
insn->bits3.dp_read_gen5.msg_type = msg_type;
insn->bits3.dp_read_gen5.target_cache = target_cache;
} else if (p->gen >= 045) {
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
}
}
 
static void brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned sampler,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
msg_length, response_length,
header_present, false);
 
if (p->gen >= 070) {
insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
insn->bits3.sampler_gen7.sampler = sampler;
insn->bits3.sampler_gen7.msg_type = msg_type;
insn->bits3.sampler_gen7.simd_mode = simd_mode;
} else if (p->gen >= 050) {
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
insn->bits3.sampler_gen5.sampler = sampler;
insn->bits3.sampler_gen5.msg_type = msg_type;
insn->bits3.sampler_gen5.simd_mode = simd_mode;
} else if (p->gen >= 045) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
} else {
insn->bits3.sampler.binding_table_index = binding_table_index;
insn->bits3.sampler.sampler = sampler;
insn->bits3.sampler.msg_type = msg_type;
insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
}
}
 
 
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_ud(0x0));
}
 
/***********************************************************************
* Comparisons, if/else/endif
*/
 
static void
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
{
p->if_stack[p->if_stack_depth] = inst;
 
p->if_stack_depth++;
if (p->if_stack_array_size <= p->if_stack_depth) {
p->if_stack_array_size *= 2;
p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
}
}
 
/* EU takes the value from the flag register and pushes it onto some
* sort of a stack (presumably merging with any flag value already on
* the stack). Within an if block, the flags at the top of the stack
* control execution on each channel of the unit, eg. on each of the
* 16 pixel values in our wm programs.
*
* When the matching 'else' instruction is reached (presumably by
* countdown of the instruction count patched in by our ELSE/ENDIF
* functions), the relevent flags are inverted.
*
* When the matching 'endif' instruction is reached, the flags are
* popped off. If the stack is now empty, normal execution resumes.
*/
struct brw_instruction *
brw_IF(struct brw_compile *p, unsigned execute_size)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
/* Override the defaults for this instruction: */
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.execution_size = execute_size;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.predicate_control = BRW_PREDICATE_NORMAL;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
push_if_stack(p, insn);
return insn;
}
 
/* This function is only used for gen6-style IF instructions with an
* embedded comparison (conditional modifier). It is not used on gen7.
*/
struct brw_instruction *
gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
brw_set_dest(p, insn, brw_imm_w(0));
if (p->compressed) {
insn->header.execution_size = BRW_EXECUTE_16;
} else {
insn->header.execution_size = BRW_EXECUTE_8;
}
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = conditional;
 
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
return insn;
}
 
/**
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
*/
static void
convert_IF_ELSE_to_ADD(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst)
{
/* The next instruction (where the ENDIF would be, if it existed) */
struct brw_instruction *next_inst = &p->store[p->nr_insn];
 
assert(p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
assert(if_inst->header.execution_size == BRW_EXECUTE_1);
 
/* Convert IF to an ADD instruction that moves the instruction pointer
* to the first instruction of the ELSE block. If there is no ELSE
* block, point to where ENDIF would be. Reverse the predicate.
*
* There's no need to execute an ENDIF since we don't need to do any
* stack operations, and if we're currently executing, we just want to
* continue normally.
*/
if_inst->header.opcode = BRW_OPCODE_ADD;
if_inst->header.predicate_inverse = 1;
 
if (else_inst != NULL) {
/* Convert ELSE to an ADD instruction that points where the ENDIF
* would be.
*/
else_inst->header.opcode = BRW_OPCODE_ADD;
 
if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
else_inst->bits3.ud = (next_inst - else_inst) * 16;
} else {
if_inst->bits3.ud = (next_inst - if_inst) * 16;
}
}
 
/**
* Patch IF and ELSE instructions with appropriate jump targets.
*/
static void
patch_IF_ELSE(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst,
struct brw_instruction *endif_inst)
{
unsigned br = 1;
 
assert(!p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(endif_inst != NULL);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
 
/* Jump count is for 64bit data chunk each, so one 128bit instruction
* requires 2 chunks.
*/
if (p->gen >= 050)
br = 2;
 
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
endif_inst->header.execution_size = if_inst->header.execution_size;
 
if (else_inst == NULL) {
/* Patch IF -> ENDIF */
if (p->gen < 060) {
/* Turn it into an IFF, which means no mask stack operations for
* all-false and jumping past the ENDIF.
*/
if_inst->header.opcode = BRW_OPCODE_IFF;
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
} else {
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
}
} else {
else_inst->header.execution_size = if_inst->header.execution_size;
 
/* Patch IF -> ELSE */
if (p->gen < 060) {
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen <= 070) {
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
}
 
/* Patch ELSE -> ENDIF */
if (p->gen < 060) {
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
* matching ENDIF.
*/
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
else_inst->bits3.if_else.pop_count = 1;
else_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
} else {
/* The IF instruction's JIP should point just past the ELSE */
if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
}
}
}
 
void
brw_ELSE(struct brw_compile *p)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_ELSE);
 
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
}
 
void
brw_ENDIF(struct brw_compile *p)
{
struct brw_instruction *insn;
struct brw_instruction *else_inst = NULL;
struct brw_instruction *if_inst = NULL;
 
/* Pop the IF and (optional) ELSE instructions from the stack */
p->if_stack_depth--;
if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
else_inst = p->if_stack[p->if_stack_depth];
p->if_stack_depth--;
}
if_inst = p->if_stack[p->if_stack_depth];
 
if (p->single_program_flow) {
/* ENDIF is useless; don't bother emitting it. */
convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
return;
}
 
insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
 
if (p->gen < 060) {
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
insn->header.thread_control = BRW_THREAD_SWITCH;
 
/* Also pop item off the stack in the endif instruction: */
if (p->gen < 060) {
insn->bits3.if_else.jump_count = 0;
insn->bits3.if_else.pop_count = 1;
insn->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
insn->bits1.branch_gen6.jump_count = 2;
} else {
insn->bits3.break_cont.jip = 2;
}
patch_IF_ELSE(p, if_inst, else_inst, insn);
}
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_BREAK);
if (p->gen >= 060) {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
 
return insn;
}
 
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
return insn;
}
 
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
return insn;
}
 
/* DO/WHILE loop:
*
* The DO/WHILE is just an unterminated loop -- break or continue are
* used for control within the loop. We have a few ways they can be
* done.
*
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
* jip and no DO instruction.
*
* For non-uniform control flow pre-gen6, there's a DO instruction to
* push the mask, and a WHILE to jump back, and BREAK to get out and
* pop the mask.
*
* For gen6, there's no more mask stack, so no need for DO. WHILE
* just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
{
if (p->gen >= 060 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
 
/* Override the defaults for this instruction:
*/
brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(p, insn, brw_null_reg());
brw_set_src1(p, insn, brw_null_reg());
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = execute_size;
insn->header.predicate_control = BRW_PREDICATE_NONE;
/* insn->header.mask_control = BRW_MASK_ENABLE; */
/* insn->header.mask_control = BRW_MASK_DISABLE; */
 
return insn;
}
}
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
unsigned br = 1;
 
if (p->gen >= 050)
br = 2;
 
if (p->gen >= 070) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = br * (do_insn - insn);
 
insn->header.execution_size = BRW_EXECUTE_8;
} else if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = brw_next_insn(p, BRW_OPCODE_ADD);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
insn->header.execution_size = BRW_EXECUTE_1;
} else {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
assert(do_insn->header.opcode == BRW_OPCODE_DO);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0));
 
insn->header.execution_size = do_insn->header.execution_size;
insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
insn->bits3.if_else.pop_count = 0;
insn->bits3.if_else.pad0 = 0;
}
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
/* FORWARD JUMPS:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
struct brw_instruction *landing = &p->store[p->nr_insn];
unsigned jmpi = 1;
 
if (p->gen >= 050)
jmpi = 2;
 
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
 
 
 
/* To integrate with the above, it makes sense that the comparison
* instruction should populate the flag register. It might be simpler
* just to use the flag reg for most WM tasks?
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
 
insn->header.destreg__conditionalmod = conditional;
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
/* Make it so that future instructions will use the computed flag
* value until brw_set_predicate_control_flag_value() is called
* again.
*/
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
dest.nr == 0) {
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
p->flag_value = 0xff;
}
}
 
/* Issue 'wait' instruction for n1, host could program MMIO
to wake up thread. */
void brw_WAIT(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
 
brw_set_dest(p, insn, src);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
insn->header.predicate_control = 0;
insn->header.compression_control = 0;
}
 
/***********************************************************************
* Helpers for the various SEND message types:
*/
 
/** Extended math function, float[8].
*/
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision)
{
if (p->gen >= 060) {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
}
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
/* Example code doesn't set predicate_control for send
* instructions.
*/
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
src.type == BRW_REGISTER_TYPE_D,
precision,
saturate,
data_type);
}
}
 
/** Extended math function, float[8].
*/
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src0.type == BRW_REGISTER_TYPE_F);
assert(src1.type == BRW_REGISTER_TYPE_F);
}
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src0.negate);
assert(!src0.abs);
assert(!src1.negate);
assert(!src1.abs);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
}
 
/**
* Extended math function, float[16].
* Use 2 send instructions.
*/
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision)
{
struct brw_instruction *insn;
 
if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
return;
}
 
/* First instruction:
*/
brw_push_insn_state(p);
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
/* Second instruction:
*/
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
 
brw_set_dest(p, insn, __offset(dest,1));
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
brw_pop_insn_state(p);
}
 
/**
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
* using a constant offset per channel.
*
* The offset must be aligned to oword size (16 bytes). Used for
* register spilling.
*/
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control, msg_type;
int mlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
mlen = 2;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
mlen = 3;
}
 
/* Set up the message header. This is g0, with g0.2 filled with
* the offset. We don't want to leave our offset around in g0 or
* it'll screw up texture samples, so set it up inside the message
* reg.
*/
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_reg dest;
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
int send_commit_msg;
struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
 
if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
insn->header.compression_control = BRW_COMPRESSION_NONE;
src_header = vec16(src_header);
}
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* Until gen6, writes followed by reads from the same location
* are not guaranteed to be ordered unless write_commit is set.
* If set, then a no-op write is issued to the destination
* register to set a dependency, and a read from the destination
* can be used to ensure the ordering.
*
* For gen6, only writes between different threads need ordering
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
if (p->gen >= 060) {
dest = __retype_uw(vec16(brw_null_reg()));
send_commit_msg = 0;
} else {
dest = src_header;
send_commit_msg = 1;
}
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
brw_set_dp_write_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
msg_type,
mlen,
true, /* header_present */
0, /* pixel scoreboard */
send_commit_msg, /* response_length */
0, /* eot */
send_commit_msg);
}
}
 
 
/**
* Read a block of owords (half a GRF each) from the scratch buffer
* using a constant index per channel.
*
* Offset must be aligned to oword size (16 bytes). Used for register
* spilling.
*/
void
brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control;
int rlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
dest = __retype_uw(dest);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
rlen = 1;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
rlen = 2;
}
 
{
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(insn->header.predicate_control == 0);
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
 
brw_set_dest(p, insn, dest); /* UW? */
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
}
 
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
/* On newer hardware, offset is in units of owords. */
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 reg, 2 owords!) */
 
brw_pop_insn_state(p);
}
 
/**
* Read a set of dwords from the data port Data Cache (const buffer).
*
* Location (in buffer) appears as UD offsets in the register after
* the provided mrf header reg.
*/
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, brw_null_reg());
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
/**
* Read float[4] constant(s) from VS constant buffer.
* For relative addressing, two float[4] constants will be read into 'dest'.
* Otherwise, one float[4] constant will be read into the lower half of 'dest'.
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index)
{
struct brw_instruction *insn;
unsigned msg_reg_nr = 1;
 
if (p->gen >= 060)
location /= 16;
 
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
brw_imm_ud(location));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
0,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 Oword) */
}
 
/**
* Read a float[4] constant per vertex from VS constant buffer, with
* relative addressing.
*/
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addr_reg,
unsigned offset,
unsigned bind_table_index)
{
struct brw_reg src = brw_vec8_grf(0, 0);
struct brw_instruction *insn;
int msg_type;
 
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
brw_ADD(p, __retype_d(brw_message_reg(1)),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
 
gen6_resolve_implied_move(p, &src, 0);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else if (p->gen >= 045)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present)
{
struct brw_instruction *insn;
unsigned msg_type;
struct brw_reg dest;
 
if (dispatch_width == 16)
dest = __retype_uw(vec16(brw_null_reg()));
else
dest = __retype_uw(vec8(brw_null_reg()));
 
if (p->gen >= 060 && binding_table_index == 0) {
insn = brw_next_insn(p, BRW_OPCODE_SENDC);
} else {
insn = brw_next_insn(p, BRW_OPCODE_SEND);
}
/* The execution mask is ignored for render target writes. */
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
 
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
} else {
insn->header.destreg__conditionalmod = msg_reg_nr;
 
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
}
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p,
insn,
binding_table_index,
msg_control,
msg_type,
msg_length,
header_present,
eot,
response_length,
eot,
0 /* send_commit_msg */);
}
 
/**
* Texture sample instruction.
* Note: the msg_type plus msg_length values determine exactly what kind
* of sampling operation is performed. See volume 4, page 161 of docs.
*/
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
assert(writemask);
 
if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 
writemask = ~writemask & WRITEMASK_XYZW;
 
brw_push_insn_state(p);
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
 
brw_pop_insn_state(p);
 
src0 = __retype_uw(brw_null_reg());
}
 
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_sampler_message(p, insn,
binding_table_index,
sampler,
msg_type,
response_length,
msg_length,
header_present,
simd_mode);
}
}
 
/* All these variables are pretty confusing - we might be better off
* using bitmasks and macros for this, in the old style. Or perhaps
* just having the caller instantiate the fields in dword3 itself.
*/
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
if (p->gen >= 070) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
__retype_ud(brw_vec1_grf(0, 5)),
brw_imm_ud(0xff00));
brw_pop_insn_state(p);
}
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(msg_length < BRW_MAX_MRF);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen <= 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_urb_message(p,
insn,
allocate,
used,
msg_length,
response_length,
eot,
writes_complete,
offset,
swizzle);
}
 
static int
brw_find_next_block_end(struct brw_compile *p, int start)
{
int ip;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* There is no DO instruction on gen6, so to find the end of the loop
* we have to see if the loop is jumping back before our start
* instruction.
*/
static int
brw_find_loop_end(struct brw_compile *p, int start)
{
int ip;
int br = 2;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
if (insn->header.opcode == BRW_OPCODE_WHILE) {
int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
if (ip + jip / br <= start)
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* After program generation, go back and update the UIP and JIP of
* BREAK and CONT instructions to their correct locations.
*/
void
brw_set_uip_jip(struct brw_compile *p)
{
int ip;
int br = 2;
 
if (p->gen <= 060)
return;
 
for (ip = 0; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_BREAK:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
/* Gen7 UIP points to WHILE; Gen6 points just after it */
insn->bits3.break_cont.uip =
br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
break;
case BRW_OPCODE_CONTINUE:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
assert(insn->bits3.break_cont.uip != 0);
assert(insn->bits3.break_cont.jip != 0);
break;
}
}
}
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_ff_sync_message(p,
insn,
allocate,
response_length,
eot);
}
/drivers/video/Intel-2D/brw/brw_wm.c
0,0 → 1,681
#include "brw.h"
 
#define X16 8
#define Y16 10
 
static void brw_wm_xy(struct brw_compile *p, int dw)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = __retype_uw(r1);
struct brw_reg x_uw, y_uw;
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
if (dw == 16) {
x_uw = brw_uw16_grf(30, 0);
y_uw = brw_uw16_grf(28, 0);
} else {
x_uw = brw_uw8_grf(30, 0);
y_uw = brw_uw8_grf(28, 0);
}
 
brw_ADD(p,
x_uw,
__stride(__suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
brw_ADD(p,
y_uw,
__stride(__suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
 
static void brw_wm_affine_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
msg += dw/8;
 
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 4),
brw_vec8_grf(2, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
}
}
 
static inline unsigned simd(int dw)
{
return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
}
 
static inline struct brw_reg sample_result(int dw, int result)
{
return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static int brw_wm_sample(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
bool header;
int len;
 
len = dw == 16 ? 4 : 2;
if (p->gen >= 060) {
header = false;
src0 = brw_message_reg(++msg);
} else {
header = true;
src0 = brw_vec8_grf(0, 0);
}
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_XYZW, 0,
2*len, len+header, header, simd(dw));
return result;
}
 
static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
int mlen, rlen;
 
if (dw == 8) {
/* SIMD8 sample return is not masked */
mlen = 3;
rlen = 4;
} else {
mlen = 5;
rlen = 2;
}
 
if (p->gen >= 060)
src0 = brw_message_reg(msg);
else
src0 = brw_vec8_grf(0, 0);
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_W, 0,
rlen, mlen, true, simd(dw));
 
if (dw == 8)
result += 3;
 
return result;
}
 
static int brw_wm_affine(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
static inline struct brw_reg null_result(int dw)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static void brw_fb_write(struct brw_compile *p, int dw)
{
struct brw_instruction *insn;
unsigned msg_control, msg_type, msg_len;
struct brw_reg src0;
bool header;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
msg_len = 8;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
msg_len = 4;
}
 
if (p->gen < 060) {
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
brw_pop_insn_state(p);
 
msg_len += 2;
}
 
/* The execution mask is ignored for render target writes. */
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = brw_message_reg(2);
header = false;
} else {
insn->header.destreg__conditionalmod = 0;
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = __retype_uw(brw_vec8_grf(0, 0));
header = true;
}
 
brw_set_dest(p, insn, null_result(dw));
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p, insn, 0,
msg_control, msg_type, msg_len,
header, true, 0, true, false);
}
 
static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
/* XXX pixel execution mask? */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MOV(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MOV(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MOV(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec8_grf(mask, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec8_grf(mask+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__opacity(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec1_grf(mask, 3));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec1_grf(mask, 3));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src + 0, 0),
brw_vec8_grf(mask + 0, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src + 1, 0),
brw_vec8_grf(mask + 1, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src + 2, 0),
brw_vec8_grf(mask + 2, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src + 3, 0),
brw_vec8_grf(mask + 3, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n + 1, 0),
brw_vec8_grf(mask + 2*n + 1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
/* Projective variants */
 
static void brw_wm_projective_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
/* First compute 1/z */
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv+1, 0),
brw_vec8_grf(2, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(4, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
/* First compute 1/z */
brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
 
/* Now compute the output s,t values */
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
}
}
 
static int brw_wm_projective(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
bool
brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
/drivers/video/Intel-2D/gen3_render.h
0,0 → 1,1479
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef _I915_REG_H_
#define _I915_REG_H_
 
#define CMD_3D (3 << 29)
 
#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
 
#define PRIM3D (CMD_3D | (0x1f<<24))
#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17))
#define PRIM3D_TRILIST (PRIM3D | (0x0<<18))
#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18))
#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18))
#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18))
#define PRIM3D_POLY (PRIM3D | (0x4<<18))
#define PRIM3D_LINELIST (PRIM3D | (0x5<<18))
#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18))
#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18))
#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18))
#define PRIM3D_DIB (PRIM3D | (0x9<<18))
#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18))
#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18))
#define PRIM3D_MASK (0x1f<<18)
 
 
/* p137 */
#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
#define AA_LINE_ECAAR_WIDTH_0_5 0
#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
#define AA_LINE_REGION_WIDTH_0_5 0
#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
 
/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24))
#define BFO_ENABLE_STENCIL_REF (1<<23)
#define BFO_STENCIL_REF_SHIFT 15
#define BFO_STENCIL_REF_MASK (0xff<<15)
#define BFO_ENABLE_STENCIL_FUNCS (1<<14)
#define BFO_STENCIL_TEST_SHIFT 11
#define BFO_STENCIL_TEST_MASK (0x7<<11)
#define BFO_STENCIL_FAIL_SHIFT 8
#define BFO_STENCIL_FAIL_MASK (0x7<<8)
#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5
#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5)
#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2
#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2)
#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1)
#define BFO_STENCIL_TWO_SIDE (1<<0)
 
/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24))
#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17)
#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
#define BFM_STENCIL_TEST_MASK_SHIFT 8
#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
#define BFM_STENCIL_WRITE_MASK_SHIFT 0
#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
 
/* 3DSTATE_BIN_CONTROL p141 */
 
/* p143 */
#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
/* Dword 1 */
#define BUF_3D_ID_COLOR_BACK (0x3<<24)
#define BUF_3D_ID_DEPTH (0x7<<24)
#define BUF_3D_USE_FENCE (1<<23)
#define BUF_3D_TILED_SURFACE (1<<22)
#define BUF_3D_TILE_WALK_X 0
#define BUF_3D_TILE_WALK_Y (1<<21)
/* Dword 2 */
#define BUF_3D_ADDR(x) ((x) & ~0x3)
 
/* 3DSTATE_CHROMA_KEY */
 
/* 3DSTATE_CLEAR_PARAMETERS, p150 */
#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
/* Dword 1 */
#define CLEARPARAM_CLEAR_RECT (1 << 16)
#define CLEARPARAM_ZONE_INIT (0 << 16)
#define CLEARPARAM_WRITE_COLOR (1 << 2)
#define CLEARPARAM_WRITE_DEPTH (1 << 1)
#define CLEARPARAM_WRITE_STENCIL (1 << 0)
 
/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
 
/* 3DSTATE_COORD_SET_BINDINGS, p154 */
#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24))
#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3))
 
/* p156 */
#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
 
/* p157 */
#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
 
/* p158 */
#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
 
/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16))
/* scale in dword 1 */
 
/* The depth subrectangle is not supported, but must be disabled. */
/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0))
 
/* p161 */
#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
/* Dword 1 */
#define TEX_DEFAULT_COLOR_OGL (0<<30)
#define TEX_DEFAULT_COLOR_D3D (1<<30)
#define ZR_EARLY_DEPTH (1<<29)
#define LOD_PRECLAMP_OGL (1<<28)
#define LOD_PRECLAMP_D3D (0<<28)
#define DITHER_FULL_ALWAYS (0<<26)
#define DITHER_FULL_ON_FB_BLEND (1<<26)
#define DITHER_CLAMPED_ALWAYS (2<<26)
#define LINEAR_GAMMA_BLEND_32BPP (1<<25)
#define DEBUG_DISABLE_ENH_DITHER (1<<24)
#define DSTORG_HORT_BIAS(x) ((x)<<20)
#define DSTORG_VERT_BIAS(x) ((x)<<16)
#define COLOR_4_2_2_CHNL_WRT_ALL 0
#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
#define COLR_BUF_8BIT 0
#define COLR_BUF_RGB555 (1<<8)
#define COLR_BUF_RGB565 (2<<8)
#define COLR_BUF_ARGB8888 (3<<8)
#define COLR_BUF_ARGB4444 (8<<8)
#define COLR_BUF_ARGB1555 (9<<8)
#define COLR_BUF_ARGB2AAA (0xa<<8)
#define DEPTH_IS_Z 0
#define DEPTH_IS_W (1<<6)
#define DEPTH_FRMT_16_FIXED 0
#define DEPTH_FRMT_16_FLOAT (1<<2)
#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
#define VERT_LINE_STRIDE_1 (1<<1)
#define VERT_LINE_STRIDE_0 0
#define VERT_LINE_STRIDE_OFS_1 1
#define VERT_LINE_STRIDE_OFS_0 0
 
/* p166 */
#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
/* Dword 1 */
#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
#define DRAW_DITHER_OFS_X(x) ((x)<<26)
#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
/* Dword 2 */
#define DRAW_YMIN(x) ((uint16_t)(x)<<16)
#define DRAW_XMIN(x) ((uint16_t)(x))
/* Dword 3 */
#define DRAW_YMAX(x) ((uint16_t)(x)<<16)
#define DRAW_XMAX(x) ((uint16_t)(x))
/* Dword 4 */
#define DRAW_YORG(x) ((uint16_t)(x)<<16)
#define DRAW_XORG(x) ((uint16_t)(x))
 
/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
 
/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
 
/* _3DSTATE_FOG_COLOR, p173 */
#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
#define FOG_COLOR_RED(x) ((x)<<16)
#define FOG_COLOR_GREEN(x) ((x)<<8)
#define FOG_COLOR_BLUE(x) (x)
 
/* _3DSTATE_FOG_MODE, p174 */
#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
/* Dword 1 */
#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31)
#define FMC1_FOGFUNC_VERTEX (0<<28)
#define FMC1_FOGFUNC_PIXEL_EXP (1<<28)
#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28)
#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28)
#define FMC1_FOGFUNC_MASK (3<<28)
#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27)
#define FMC1_FOGINDEX_Z (0<<25)
#define FMC1_FOGINDEX_W (1<<25)
#define FMC1_C1_C2_MODIFY_ENABLE (1<<24)
#define FMC1_DENSITY_MODIFY_ENABLE (1<<23)
#define FMC1_C1_ONE (1<<13)
#define FMC1_C1_MASK (0xffff<<4)
/* Dword 2 */
#define FMC2_C2_ONE (1<<16)
/* Dword 3 */
#define FMC3_D_ONE (1<<16)
 
/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
#define IAB_MODIFY_ENABLE (1<<23)
#define IAB_ENABLE (1<<22)
#define IAB_MODIFY_FUNC (1<<21)
#define IAB_FUNC_SHIFT 16
#define IAB_MODIFY_SRC_FACTOR (1<<11)
#define IAB_SRC_FACTOR_SHIFT 6
#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6)
#define IAB_MODIFY_DST_FACTOR (1<<5)
#define IAB_DST_FACTOR_SHIFT 0
#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0)
 
#define BLENDFACT_ZERO 0x01
#define BLENDFACT_ONE 0x02
#define BLENDFACT_SRC_COLR 0x03
#define BLENDFACT_INV_SRC_COLR 0x04
#define BLENDFACT_SRC_ALPHA 0x05
#define BLENDFACT_INV_SRC_ALPHA 0x06
#define BLENDFACT_DST_ALPHA 0x07
#define BLENDFACT_INV_DST_ALPHA 0x08
#define BLENDFACT_DST_COLR 0x09
#define BLENDFACT_INV_DST_COLR 0x0a
#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b
#define BLENDFACT_CONST_COLOR 0x0c
#define BLENDFACT_INV_CONST_COLOR 0x0d
#define BLENDFACT_CONST_ALPHA 0x0e
#define BLENDFACT_INV_CONST_ALPHA 0x0f
#define BLENDFACT_MASK 0x0f
 
#define BLENDFUNC_ADD 0x0
#define BLENDFUNC_SUBTRACT 0x1
#define BLENDFUNC_REVERSE_SUBTRACT 0x2
#define BLENDFUNC_MIN 0x3
#define BLENDFUNC_MAX 0x4
#define BLENDFUNC_MASK 0x7
 
/* 3DSTATE_LOAD_INDIRECT, p180 */
 
#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16))
#define LI0_STATE_STATIC_INDIRECT (0x01<<8)
#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8)
#define LI0_STATE_SAMPLER (0x04<<8)
#define LI0_STATE_MAP (0x08<<8)
#define LI0_STATE_PROGRAM (0x10<<8)
#define LI0_STATE_CONSTANTS (0x20<<8)
 
#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define SIS0_FORCE_LOAD (1<<1)
#define SIS0_BUFFER_VALID (1<<0)
#define SIS1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define DIS0_BUFFER_RESET (1<<1)
#define DIS0_BUFFER_VALID (1<<0)
 
#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define SSB0_FORCE_LOAD (1<<1)
#define SSB0_BUFFER_VALID (1<<0)
#define SSB1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define MSB0_FORCE_LOAD (1<<1)
#define MSB0_BUFFER_VALID (1<<0)
#define MSB1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define PSP0_FORCE_LOAD (1<<1)
#define PSP0_BUFFER_VALID (1<<0)
#define PSP1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define PSC0_FORCE_LOAD (1<<1)
#define PSC0_BUFFER_VALID (1<<0)
#define PSC1_BUFFER_LENGTH(x) ((x)&0xff)
 
/* _3DSTATE_RASTERIZATION_RULES */
#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
#define ENABLE_POINT_RASTER_RULE (1<<15)
#define OGL_POINT_RASTER_RULE (1<<13)
#define ENABLE_TEXKILL_3D_4D (1<<10)
#define TEXKILL_3D (0<<9)
#define TEXKILL_4D (1<<9)
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
 
/* _3DSTATE_SCISSOR_ENABLE, p256 */
#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
#define DISABLE_SCISSOR_RECT (1<<1)
 
/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
/* Dword 1 */
#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
#define SCISSOR_RECT_0_XMIN(x) (x)
/* Dword 2 */
#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
#define SCISSOR_RECT_0_XMAX(x) (x)
 
/* p189 */
#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
#define I1_LOAD_S(n) (1<<(4+n))
 
#define S0_VB_OFFSET_MASK 0xffffffc
#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
 
#define S1_VERTEX_WIDTH_SHIFT 24
#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
#define S1_VERTEX_PITCH_SHIFT 16
#define S1_VERTEX_PITCH_MASK (0x3f<<16)
 
#define TEXCOORDFMT_2D 0x0
#define TEXCOORDFMT_3D 0x1
#define TEXCOORDFMT_4D 0x2
#define TEXCOORDFMT_1D 0x3
#define TEXCOORDFMT_2D_16 0x4
#define TEXCOORDFMT_4D_16 0x5
#define TEXCOORDFMT_NOT_PRESENT 0xf
#define S2_TEXCOORD_FMT0_MASK 0xf
#define S2_TEXCOORD_FMT1_SHIFT 4
#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
#define S2_TEXCOORD_NONE (~0)
 
#define TEXCOORD_WRAP_SHORTEST_TCX 8
#define TEXCOORD_WRAP_SHORTEST_TCY 4
#define TEXCOORD_WRAP_SHORTEST_TCZ 2
#define TEXCOORD_PERSPECTIVE_DISABLE 1
 
#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4))
#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4))
#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4))
#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4))
 
/* S3 not interesting */
 
#define S4_POINT_WIDTH_SHIFT 23
#define S4_POINT_WIDTH_MASK (0x1ff<<23)
#define S4_LINE_WIDTH_SHIFT 19
#define S4_LINE_WIDTH_ONE (0x2<<19)
#define S4_LINE_WIDTH_MASK (0xf<<19)
#define S4_FLATSHADE_ALPHA (1<<18)
#define S4_FLATSHADE_FOG (1<<17)
#define S4_FLATSHADE_SPECULAR (1<<16)
#define S4_FLATSHADE_COLOR (1<<15)
#define S4_CULLMODE_BOTH (0<<13)
#define S4_CULLMODE_NONE (1<<13)
#define S4_CULLMODE_CW (2<<13)
#define S4_CULLMODE_CCW (3<<13)
#define S4_CULLMODE_MASK (3<<13)
#define S4_VFMT_POINT_WIDTH (1<<12)
#define S4_VFMT_SPEC_FOG (1<<11)
#define S4_VFMT_COLOR (1<<10)
#define S4_VFMT_DEPTH_OFFSET (1<<9)
#define S4_VFMT_XYZ (1<<6)
#define S4_VFMT_XYZW (2<<6)
#define S4_VFMT_XY (3<<6)
#define S4_VFMT_XYW (4<<6)
#define S4_VFMT_XYZW_MASK (7<<6)
#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
#define S4_VFMT_FOG_PARAM (1<<2)
#define S4_SPRITE_POINT_ENABLE (1<<1)
#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
 
#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
S4_VFMT_SPEC_FOG | \
S4_VFMT_COLOR | \
S4_VFMT_DEPTH_OFFSET | \
S4_VFMT_XYZW_MASK | \
S4_VFMT_FOG_PARAM)
 
#define S5_WRITEDISABLE_ALPHA (1<<31)
#define S5_WRITEDISABLE_RED (1<<30)
#define S5_WRITEDISABLE_GREEN (1<<29)
#define S5_WRITEDISABLE_BLUE (1<<28)
#define S5_WRITEDISABLE_MASK (0xf<<28)
#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
#define S5_LAST_PIXEL_ENABLE (1<<26)
#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
#define S5_FOG_ENABLE (1<<24)
#define S5_STENCIL_REF_SHIFT 16
#define S5_STENCIL_REF_MASK (0xff<<16)
#define S5_STENCIL_TEST_FUNC_SHIFT 13
#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
#define S5_STENCIL_FAIL_SHIFT 10
#define S5_STENCIL_FAIL_MASK (0x7<<10)
#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
#define S5_STENCIL_WRITE_ENABLE (1<<3)
#define S5_STENCIL_TEST_ENABLE (1<<2)
#define S5_COLOR_DITHER_ENABLE (1<<1)
#define S5_LOGICOP_ENABLE (1<<0)
 
#define S6_ALPHA_TEST_ENABLE (1<<31)
#define S6_ALPHA_TEST_FUNC_SHIFT 28
#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
#define S6_ALPHA_REF_SHIFT 20
#define S6_ALPHA_REF_MASK (0xff<<20)
#define S6_DEPTH_TEST_ENABLE (1<<19)
#define S6_DEPTH_TEST_FUNC_SHIFT 16
#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
#define S6_CBUF_BLEND_ENABLE (1<<15)
#define S6_CBUF_BLEND_FUNC_SHIFT 12
#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
#define S6_DEPTH_WRITE_ENABLE (1<<3)
#define S6_COLOR_WRITE_ENABLE (1<<2)
#define S6_TRISTRIP_PV_SHIFT 0
#define S6_TRISTRIP_PV_MASK (0x3<<0)
 
#define S7_DEPTH_OFFSET_CONST_MASK ~0
 
/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
 
/* _3DSTATE_MODES_4, p218 */
#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24))
#define ENABLE_LOGIC_OP_FUNC (1<<23)
#define LOGIC_OP_FUNC(x) ((x)<<18)
#define LOGICOP_MASK (0xf<<18)
#define LOGICOP_COPY 0xc
#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
#define ENABLE_STENCIL_TEST_MASK (1<<17)
#define STENCIL_TEST_MASK(x) ((x)<<8)
#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
#define ENABLE_STENCIL_WRITE_MASK (1<<16)
#define STENCIL_WRITE_MASK(x) ((x)&0xff)
 
/* _3DSTATE_MODES_5, p220 */
#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
#define PIPELINE_FLUSH_RENDER_CACHE (1<<18)
#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16)
 
/* p221 */
#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16))
#define PS1_REG(n) (1<<(n))
#define PS2_CONST_X(n) (n)
#define PS3_CONST_Y(n) (n)
#define PS4_CONST_Z(n) (n)
#define PS5_CONST_W(n) (n)
 
/* p222 */
 
#define I915_MAX_TEX_INDIRECT 4
#define I915_MAX_TEX_INSN 32
#define I915_MAX_ALU_INSN 64
#define I915_MAX_DECL_INSN 27
#define I915_MAX_TEMPORARY 16
 
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.
*/
#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
 
#define REG_TYPE_R 0 /* temporary regs, no need to
* dcl, must be written before
* read -- Preserved between
* phases.
*/
#define REG_TYPE_T 1 /* Interpolated values, must be
* dcl'ed before use.
*
* 0..7: texture coord,
* 8: diffuse spec,
* 9: specular color,
* 10: fog parameter in w.
*/
#define REG_TYPE_CONST 2 /* Restriction: only one const
* can be referenced per
* instruction, though it may be
* selected for multiple inputs.
* Constants not initialized
* default to zero.
*/
#define REG_TYPE_S 3 /* sampler */
#define REG_TYPE_OC 4 /* output color (rgba) */
#define REG_TYPE_OD 5 /* output depth (w), xyz are
* temporaries. If not written,
* interpolated depth is used?
*/
#define REG_TYPE_U 6 /* unpreserved temporaries */
#define REG_TYPE_MASK 0x7
#define REG_NR_MASK 0xf
 
/* REG_TYPE_T:
*/
#define T_TEX0 0
#define T_TEX1 1
#define T_TEX2 2
#define T_TEX3 3
#define T_TEX4 4
#define T_TEX5 5
#define T_TEX6 6
#define T_TEX7 7
#define T_DIFFUSE 8
#define T_SPECULAR 9
#define T_FOG_W 10 /* interpolated fog is in W coord */
 
/* Arithmetic instructions */
 
/* .replicate_swizzle == selection and replication of a particular
* scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
*/
#define A0_NOP (0x0<<24) /* no operation */
#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
#define A0_MOV (0x2<<24) /* dst = src0 */
#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
#define A0_FLR (0x10<<24) /* dst = floor(src0) */
#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
#define A0_TRC (0x12<<24) /* dst = int(src0) */
#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
#define A0_DEST_SATURATE (1<<22)
#define A0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
#define A0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define A0_DEST_CHANNEL_X (1<<10)
#define A0_DEST_CHANNEL_Y (2<<10)
#define A0_DEST_CHANNEL_Z (4<<10)
#define A0_DEST_CHANNEL_W (8<<10)
#define A0_DEST_CHANNEL_ALL (0xf<<10)
#define A0_DEST_CHANNEL_SHIFT 10
#define A0_SRC0_TYPE_SHIFT 7
#define A0_SRC0_NR_SHIFT 2
 
#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
 
#define SRC_X 0
#define SRC_Y 1
#define SRC_Z 2
#define SRC_W 3
#define SRC_ZERO 4
#define SRC_ONE 5
 
#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
#define A1_SRC0_CHANNEL_X_SHIFT 28
#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
#define A1_SRC0_CHANNEL_Y_SHIFT 24
#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
#define A1_SRC0_CHANNEL_Z_SHIFT 20
#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
#define A1_SRC0_CHANNEL_W_SHIFT 16
#define A1_SRC1_TYPE_SHIFT 13
#define A1_SRC1_NR_SHIFT 8
#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
#define A1_SRC1_CHANNEL_X_SHIFT 4
#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
#define A1_SRC1_CHANNEL_Y_SHIFT 0
 
#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
#define A2_SRC1_CHANNEL_Z_SHIFT 28
#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
#define A2_SRC1_CHANNEL_W_SHIFT 24
#define A2_SRC2_TYPE_SHIFT 21
#define A2_SRC2_NR_SHIFT 16
#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
#define A2_SRC2_CHANNEL_X_SHIFT 12
#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
#define A2_SRC2_CHANNEL_Y_SHIFT 8
#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
#define A2_SRC2_CHANNEL_Z_SHIFT 4
#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
#define A2_SRC2_CHANNEL_W_SHIFT 0
 
/* Texture instructions */
#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
* sampler and address, and output
* filtered texel data to destination
* register */
#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
* perspective divide of the texture
* coordinate .xyz values by .w before
* sampling. */
#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
* computed LOD by w. Only S4.6 two's
* comp is used. This implies that a
* float to fixed conversion is
* done. */
#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
* operation. Simply kills the pixel
* if any channel of the address
* register is < 0.0. */
#define T0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
/* Note: U (unpreserved) regs do not retain their values between
* phases (cannot be used for feedback)
*
* Note: oC and OD registers can only be used as the destination of a
* texture instruction once per phase (this is an implementation
* restriction).
*/
#define T0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
#define T0_SAMPLER_NR_MASK (0xf<<0)
 
#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
#define T1_ADDRESS_REG_NR_SHIFT 17
#define T2_MBZ 0
 
/* Declaration instructions */
#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
* register or an s (sampler)
* register. */
#define D0_SAMPLE_TYPE_SHIFT 22
#define D0_SAMPLE_TYPE_2D (0x0<<22)
#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
#define D0_SAMPLE_TYPE_MASK (0x3<<22)
 
#define D0_TYPE_SHIFT 19
/* Allow: T, S */
#define D0_NR_SHIFT 14
/* Allow T: 0..10, S: 0..15 */
#define D0_CHANNEL_X (1<<10)
#define D0_CHANNEL_Y (2<<10)
#define D0_CHANNEL_Z (4<<10)
#define D0_CHANNEL_W (8<<10)
#define D0_CHANNEL_ALL (0xf<<10)
#define D0_CHANNEL_NONE (0<<10)
 
#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
 
/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
* or specular declarations.
*
* For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
*
* Must be zero for S (sampler) dcls
*/
#define D1_MBZ 0
#define D2_MBZ 0
 
/* p207.
* The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK
*/
#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16))
 
#define MS1_MAPMASK_SHIFT 0
#define MS1_MAPMASK_MASK (0x8fff<<0)
 
#define MS2_UNTRUSTED_SURFACE (1<<31)
#define MS2_ADDRESS_MASK 0xfffffffc
#define MS2_VERTICAL_LINE_STRIDE (1<<1)
#define MS2_VERTICAL_OFFSET (1<<1)
 
#define MS3_HEIGHT_SHIFT 21
#define MS3_WIDTH_SHIFT 10
#define MS3_PALETTE_SELECT (1<<9)
#define MS3_MAPSURF_FORMAT_SHIFT 7
#define MS3_MAPSURF_FORMAT_MASK (0x7<<7)
#define MAPSURF_8BIT (1<<7)
#define MAPSURF_16BIT (2<<7)
#define MAPSURF_32BIT (3<<7)
#define MAPSURF_422 (5<<7)
#define MAPSURF_COMPRESSED (6<<7)
#define MAPSURF_4BIT_INDEXED (7<<7)
#define MS3_MT_FORMAT_MASK (0x7 << 3)
#define MS3_MT_FORMAT_SHIFT 3
#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
#define MT_8BIT_L8 (1<<3)
#define MT_8BIT_A8 (4<<3)
#define MT_8BIT_MONO8 (5<<3)
#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
#define MT_16BIT_ARGB1555 (1<<3)
#define MT_16BIT_ARGB4444 (2<<3)
#define MT_16BIT_AY88 (3<<3)
#define MT_16BIT_88DVDU (5<<3)
#define MT_16BIT_BUMP_655LDVDU (6<<3)
#define MT_16BIT_I16 (7<<3)
#define MT_16BIT_L16 (8<<3)
#define MT_16BIT_A16 (9<<3)
#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
#define MT_32BIT_ABGR8888 (1<<3)
#define MT_32BIT_XRGB8888 (2<<3)
#define MT_32BIT_XBGR8888 (3<<3)
#define MT_32BIT_QWVU8888 (4<<3)
#define MT_32BIT_AXVU8888 (5<<3)
#define MT_32BIT_LXVU8888 (6<<3)
#define MT_32BIT_XLVU8888 (7<<3)
#define MT_32BIT_ARGB2101010 (8<<3)
#define MT_32BIT_ABGR2101010 (9<<3)
#define MT_32BIT_AWVU2101010 (0xA<<3)
#define MT_32BIT_GR1616 (0xB<<3)
#define MT_32BIT_VU1616 (0xC<<3)
#define MT_32BIT_xI824 (0xD<<3)
#define MT_32BIT_xA824 (0xE<<3)
#define MT_32BIT_xL824 (0xF<<3)
#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
#define MT_422_YCRCB_NORMAL (1<<3)
#define MT_422_YCRCB_SWAPUV (2<<3)
#define MT_422_YCRCB_SWAPUVY (3<<3)
#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
#define MT_COMPRESS_DXT2_3 (1<<3)
#define MT_COMPRESS_DXT4_5 (2<<3)
#define MT_COMPRESS_FXT1 (3<<3)
#define MT_COMPRESS_DXT1_RGB (4<<3)
#define MS3_USE_FENCE_REGS (1<<2)
#define MS3_TILED_SURFACE (1<<1)
#define MS3_TILE_WALK (1<<0)
 
/* The pitch is the pitch measured in DWORDS, minus 1 */
#define MS4_PITCH_SHIFT 21
#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
#define MS4_CUBE_FACE_ENA_POSX (1<<19)
#define MS4_CUBE_FACE_ENA_NEGY (1<<18)
#define MS4_CUBE_FACE_ENA_POSY (1<<17)
#define MS4_CUBE_FACE_ENA_NEGZ (1<<16)
#define MS4_CUBE_FACE_ENA_POSZ (1<<15)
#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15)
#define MS4_MAX_LOD_SHIFT 9
#define MS4_MAX_LOD_MASK (0x3f<<9)
#define MS4_MIP_LAYOUT_LEGACY (0<<8)
#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8)
#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8)
#define MS4_VOLUME_DEPTH_SHIFT 0
#define MS4_VOLUME_DEPTH_MASK (0xff<<0)
 
/* p244.
* The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK.
*/
#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16))
 
#define SS1_MAPMASK_SHIFT 0
#define SS1_MAPMASK_MASK (0x8fff<<0)
 
#define SS2_REVERSE_GAMMA_ENABLE (1<<31)
#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30)
#define SS2_COLORSPACE_CONVERSION (1<<29)
#define SS2_CHROMAKEY_SHIFT 27
#define SS2_BASE_MIP_LEVEL_SHIFT 22
#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22)
#define SS2_MIP_FILTER_SHIFT 20
#define SS2_MIP_FILTER_MASK (0x3<<20)
#define MIPFILTER_NONE 0
#define MIPFILTER_NEAREST 1
#define MIPFILTER_LINEAR 3
#define SS2_MAG_FILTER_SHIFT 17
#define SS2_MAG_FILTER_MASK (0x7<<17)
#define FILTER_NEAREST 0
#define FILTER_LINEAR 1
#define FILTER_ANISOTROPIC 2
#define FILTER_4X4_1 3
#define FILTER_4X4_2 4
#define FILTER_4X4_FLAT 5
#define FILTER_6X5_MONO 6 /* XXX - check */
#define SS2_MIN_FILTER_SHIFT 14
#define SS2_MIN_FILTER_MASK (0x7<<14)
#define SS2_LOD_BIAS_SHIFT 5
#define SS2_LOD_BIAS_ONE (0x10<<5)
#define SS2_LOD_BIAS_MASK (0x1ff<<5)
/* Shadow requires:
* MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
* FILTER_4X4_x MIN and MAG filters
*/
#define SS2_SHADOW_ENABLE (1<<4)
#define SS2_MAX_ANISO_MASK (1<<3)
#define SS2_MAX_ANISO_2 (0<<3)
#define SS2_MAX_ANISO_4 (1<<3)
#define SS2_SHADOW_FUNC_SHIFT 0
#define SS2_SHADOW_FUNC_MASK (0x7<<0)
/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
 
#define SS3_MIN_LOD_SHIFT 24
#define SS3_MIN_LOD_ONE (0x10<<24)
#define SS3_MIN_LOD_MASK (0xff<<24)
#define SS3_KILL_PIXEL_ENABLE (1<<17)
#define SS3_TCX_ADDR_MODE_SHIFT 12
#define SS3_TCX_ADDR_MODE_MASK (0x7<<12)
#define TEXCOORDMODE_WRAP 0
#define TEXCOORDMODE_MIRROR 1
#define TEXCOORDMODE_CLAMP_EDGE 2
#define TEXCOORDMODE_CUBE 3
#define TEXCOORDMODE_CLAMP_BORDER 4
#define TEXCOORDMODE_MIRROR_ONCE 5
#define SS3_TCY_ADDR_MODE_SHIFT 9
#define SS3_TCY_ADDR_MODE_MASK (0x7<<9)
#define SS3_TCZ_ADDR_MODE_SHIFT 6
#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6)
#define SS3_NORMALIZED_COORDS (1<<5)
#define SS3_TEXTUREMAP_INDEX_SHIFT 1
#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1)
#define SS3_DEINTERLACER_ENABLE (1<<0)
 
#define SS4_BORDER_COLOR_MASK (~0)
 
/* 3DSTATE_SPAN_STIPPLE, p258
*/
#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
#define ST1_ENABLE (1<<16)
#define ST1_MASK (0xffff)
 
#define FLUSH_MAP_CACHE (1<<0)
#define FLUSH_RENDER_CACHE (1<<1)
 
#endif
/* -*- c-basic-offset: 4 -*- */
/*
* Copyright © 2006,2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.
*/
#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
 
#define REG_TYPE_R 0 /* temporary regs, no need to
* dcl, must be written before
* read -- Preserved between
* phases.
*/
#define REG_TYPE_T 1 /* Interpolated values, must be
* dcl'ed before use.
*
* 0..7: texture coord,
* 8: diffuse spec,
* 9: specular color,
* 10: fog parameter in w.
*/
#define REG_TYPE_CONST 2 /* Restriction: only one const
* can be referenced per
* instruction, though it may be
* selected for multiple inputs.
* Constants not initialized
* default to zero.
*/
#define REG_TYPE_S 3 /* sampler */
#define REG_TYPE_OC 4 /* output color (rgba) */
#define REG_TYPE_OD 5 /* output depth (w), xyz are
* temporaries. If not written,
* interpolated depth is used?
*/
#define REG_TYPE_U 6 /* unpreserved temporaries */
#define REG_TYPE_MASK 0x7
#define REG_TYPE_SHIFT 4
#define REG_NR_MASK 0xf
 
/* REG_TYPE_T:
*/
#define T_TEX0 0
#define T_TEX1 1
#define T_TEX2 2
#define T_TEX3 3
#define T_TEX4 4
#define T_TEX5 5
#define T_TEX6 6
#define T_TEX7 7
#define T_DIFFUSE 8
#define T_SPECULAR 9
#define T_FOG_W 10 /* interpolated fog is in W coord */
 
/* Arithmetic instructions */
 
/* .replicate_swizzle == selection and replication of a particular
* scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
*/
#define A0_NOP (0x0<<24) /* no operation */
#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
#define A0_MOV (0x2<<24) /* dst = src0 */
#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
#define A0_FLR (0x10<<24) /* dst = floor(src0) */
#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
#define A0_TRC (0x12<<24) /* dst = int(src0) */
#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
#define A0_DEST_SATURATE (1<<22)
#define A0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
#define A0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define A0_DEST_CHANNEL_X (1<<10)
#define A0_DEST_CHANNEL_Y (2<<10)
#define A0_DEST_CHANNEL_Z (4<<10)
#define A0_DEST_CHANNEL_W (8<<10)
#define A0_DEST_CHANNEL_ALL (0xf<<10)
#define A0_DEST_CHANNEL_SHIFT 10
#define A0_SRC0_TYPE_SHIFT 7
#define A0_SRC0_NR_SHIFT 2
 
#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
 
#define SRC_X 0
#define SRC_Y 1
#define SRC_Z 2
#define SRC_W 3
#define SRC_ZERO 4
#define SRC_ONE 5
 
#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
#define A1_SRC0_CHANNEL_X_SHIFT 28
#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
#define A1_SRC0_CHANNEL_Y_SHIFT 24
#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
#define A1_SRC0_CHANNEL_Z_SHIFT 20
#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
#define A1_SRC0_CHANNEL_W_SHIFT 16
#define A1_SRC1_TYPE_SHIFT 13
#define A1_SRC1_NR_SHIFT 8
#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
#define A1_SRC1_CHANNEL_X_SHIFT 4
#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
#define A1_SRC1_CHANNEL_Y_SHIFT 0
 
#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
#define A2_SRC1_CHANNEL_Z_SHIFT 28
#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
#define A2_SRC1_CHANNEL_W_SHIFT 24
#define A2_SRC2_TYPE_SHIFT 21
#define A2_SRC2_NR_SHIFT 16
#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
#define A2_SRC2_CHANNEL_X_SHIFT 12
#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
#define A2_SRC2_CHANNEL_Y_SHIFT 8
#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
#define A2_SRC2_CHANNEL_Z_SHIFT 4
#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
#define A2_SRC2_CHANNEL_W_SHIFT 0
 
/* Texture instructions */
#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
* sampler and address, and output
* filtered texel data to destination
* register */
#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
* perspective divide of the texture
* coordinate .xyz values by .w before
* sampling. */
#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
* computed LOD by w. Only S4.6 two's
* comp is used. This implies that a
* float to fixed conversion is
* done. */
#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
* operation. Simply kills the pixel
* if any channel of the address
* register is < 0.0. */
#define T0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
/* Note: U (unpreserved) regs do not retain their values between
* phases (cannot be used for feedback)
*
* Note: oC and OD registers can only be used as the destination of a
* texture instruction once per phase (this is an implementation
* restriction).
*/
#define T0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
#define T0_SAMPLER_NR_MASK (0xf<<0)
 
#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
#define T1_ADDRESS_REG_NR_SHIFT 17
#define T2_MBZ 0
 
/* Declaration instructions */
#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
* register or an s (sampler)
* register. */
#define D0_SAMPLE_TYPE_SHIFT 22
#define D0_SAMPLE_TYPE_2D (0x0<<22)
#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
#define D0_SAMPLE_TYPE_MASK (0x3<<22)
 
#define D0_TYPE_SHIFT 19
/* Allow: T, S */
#define D0_NR_SHIFT 14
/* Allow T: 0..10, S: 0..15 */
#define D0_CHANNEL_X (1<<10)
#define D0_CHANNEL_Y (2<<10)
#define D0_CHANNEL_Z (4<<10)
#define D0_CHANNEL_W (8<<10)
#define D0_CHANNEL_ALL (0xf<<10)
#define D0_CHANNEL_NONE (0<<10)
 
#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
 
/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
* or specular declarations.
*
* For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
*
* Must be zero for S (sampler) dcls
*/
#define D1_MBZ 0
#define D2_MBZ 0
 
 
/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic
* operations
*/
#define MASK_X 0x1
#define MASK_Y 0x2
#define MASK_Z 0x4
#define MASK_W 0x8
#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z)
#define MASK_XYZW (MASK_XYZ | MASK_W)
#define MASK_SATURATE 0x10
 
/* Temporary, undeclared regs. Preserved between phases */
#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0)
#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1)
#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2)
#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3)
 
/* Texture coordinate regs. Must be declared. */
#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0)
#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1)
#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2)
#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3)
#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4)
#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5)
#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6)
#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7)
#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8)
#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9)
#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10)
 
/* Constant values */
#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0)
#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1)
#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2)
#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3)
#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4)
#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5)
#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6)
#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7)
 
/* Sampler regs */
#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0)
#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1)
#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2)
#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3)
 
/* Output color */
#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0)
 
/* Output depth */
#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0)
 
/* Unpreserved temporary regs */
#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0)
#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1)
#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2)
#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3)
 
#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3)
#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4)
#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4)
#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4)
 
#define REG_CHANNEL_MASK 0xf
 
#define REG_NR(reg) ((reg) & REG_NR_MASK)
#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK)
#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
 
enum gen3_fs_channel {
X_CHANNEL_VAL = 0,
Y_CHANNEL_VAL,
Z_CHANNEL_VAL,
W_CHANNEL_VAL,
ZERO_CHANNEL_VAL,
ONE_CHANNEL_VAL,
 
NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8,
NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8,
NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8,
NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8,
NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8
};
 
#define gen3_fs_operand(reg, x, y, z, w) \
(reg) | \
(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \
(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \
(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \
(w##_CHANNEL_VAL << W_CHANNEL_SHIFT)
 
/**
* Construct an operand description for using a register with no swizzling
*/
#define gen3_fs_operand_reg(reg) \
gen3_fs_operand(reg, X, Y, Z, W)
 
#define gen3_fs_operand_reg_negate(reg) \
gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W)
 
/**
* Returns an operand containing (0.0, 0.0, 0.0, 0.0).
*/
#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO)
 
/**
* Returns an unused operand
*/
#define gen3_fs_operand_none() gen3_fs_operand_zero()
 
/**
* Returns an operand containing (1.0, 1.0, 1.0, 1.0).
*/
#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE)
 
#define gen3_get_hardware_channel_val(val, shift, negate) \
(((val & 0x7) << shift) | ((val & 0x8) ? negate : 0))
 
/**
* Outputs a fragment shader command to declare a sampler or texture register.
*/
#define gen3_fs_dcl(reg) \
do { \
OUT_BATCH(D0_DCL | \
(REG_TYPE(reg) << D0_TYPE_SHIFT) | \
(REG_NR(reg) << D0_NR_SHIFT) | \
((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \
OUT_BATCH(0); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_texld(dest_reg, sampler_reg, address_reg) \
do { \
OUT_BATCH(T0_TEXLD | \
(REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
(REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
(REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg) \
do { \
OUT_BATCH(T0_TEXLDP | \
(REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
(REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
(REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \
_gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2)
 
#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2) \
_gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2)
 
#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \
do { \
/* Set up destination register and write mask */ \
OUT_BATCH(cmd | \
(REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
(((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \
(((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \
/* Set up operand 0 */ \
(REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
(REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
A1_SRC0_CHANNEL_X_SHIFT, \
A1_SRC0_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand0), \
A1_SRC0_CHANNEL_Y_SHIFT, \
A1_SRC0_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand0), \
A1_SRC0_CHANNEL_Z_SHIFT, \
A1_SRC0_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand0), \
A1_SRC0_CHANNEL_W_SHIFT, \
A1_SRC0_CHANNEL_W_NEGATE) | \
/* Set up operand 1 */ \
(REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
(REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand1), \
A1_SRC1_CHANNEL_X_SHIFT, \
A1_SRC1_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand1), \
A1_SRC1_CHANNEL_Y_SHIFT, \
A1_SRC1_CHANNEL_Y_NEGATE)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
A2_SRC1_CHANNEL_Z_SHIFT, \
A2_SRC1_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand1), \
A2_SRC1_CHANNEL_W_SHIFT, \
A2_SRC1_CHANNEL_W_NEGATE) | \
/* Set up operand 2 */ \
(REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
(REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand2), \
A2_SRC2_CHANNEL_X_SHIFT, \
A2_SRC2_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand2), \
A2_SRC2_CHANNEL_Y_SHIFT, \
A2_SRC2_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand2), \
A2_SRC2_CHANNEL_Z_SHIFT, \
A2_SRC2_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand2), \
A2_SRC2_CHANNEL_W_SHIFT, \
A2_SRC2_CHANNEL_W_NEGATE)); \
} while (0)
 
#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\
/* Set up destination register and write mask */ \
OUT_BATCH(cmd | \
(REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
(A0_DEST_CHANNEL_ALL) | \
/* Set up operand 0 */ \
(REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
(REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
A1_SRC0_CHANNEL_X_SHIFT, \
A1_SRC0_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand0), \
A1_SRC0_CHANNEL_Y_SHIFT, \
A1_SRC0_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand0), \
A1_SRC0_CHANNEL_Z_SHIFT, \
A1_SRC0_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand0), \
A1_SRC0_CHANNEL_W_SHIFT, \
A1_SRC0_CHANNEL_W_NEGATE) | \
/* Set up operand 1 */ \
(REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
(REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand1), \
A1_SRC1_CHANNEL_X_SHIFT, \
A1_SRC1_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand1), \
A1_SRC1_CHANNEL_Y_SHIFT, \
A1_SRC1_CHANNEL_Y_NEGATE)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
A2_SRC1_CHANNEL_Z_SHIFT, \
A2_SRC1_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand1), \
A2_SRC1_CHANNEL_W_SHIFT, \
A2_SRC1_CHANNEL_W_NEGATE) | \
/* Set up operand 2 */ \
(REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
(REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand2), \
A2_SRC2_CHANNEL_X_SHIFT, \
A2_SRC2_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand2), \
A2_SRC2_CHANNEL_Y_SHIFT, \
A2_SRC2_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand2), \
A2_SRC2_CHANNEL_Z_SHIFT, \
A2_SRC2_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand2), \
A2_SRC2_CHANNEL_W_SHIFT, \
A2_SRC2_CHANNEL_W_NEGATE)); \
} while (0)
 
#define gen3_fs_mov(dest_reg, operand0) \
gen3_fs_arith(MOV, dest_reg, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0) \
gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
 
#define gen3_fs_frc(dest_reg, operand0) \
gen3_fs_arith (FRC, dest_reg, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
/** Add operand0 and operand1 and put the result in dest_reg */
#define gen3_fs_add(dest_reg, operand0, operand1) \
gen3_fs_arith (ADD, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Multiply operand0 and operand1 and put the result in dest_reg */
#define gen3_fs_mul(dest_reg, operand0, operand1) \
gen3_fs_arith (MUL, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */
#define gen3_fs_rcp(dest_reg, dest_mask, operand0) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} else { \
gen3_fs_arith (RCP, dest_reg, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} \
} while (0)
 
/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */
#define gen3_fs_rsq(dest_reg, dest_mask, operand0) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} else { \
gen3_fs_arith (RSQ, dest_reg, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} \
} while (0)
 
/** Puts the minimum of operand0 and operand1 in dest_reg */
#define gen3_fs_min(dest_reg, operand0, operand1) \
gen3_fs_arith (MIN, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Puts the maximum of operand0 and operand1 in dest_reg */
#define gen3_fs_max(dest_reg, operand0, operand1) \
gen3_fs_arith (MAX, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2) \
gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2)
 
/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */
#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \
} else { \
gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \
} \
} while (0)
 
#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \
} else { \
gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \
} \
} while (0)
 
/**
* Perform a 3-component dot-product of operand0 and operand1 and put the
* resulting scalar in the channels of dest_reg specified by the dest_mask.
*/
#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \
op0, op1,\
gen3_fs_operand_none()); \
} else { \
gen3_fs_arith (DP3, dest_reg, op0, op1,\
gen3_fs_operand_none()); \
} \
} while (0)
 
/**
* Perform a 4-component dot-product of operand0 and operand1 and put the
* resulting scalar in the channels of dest_reg specified by the dest_mask.
*/
#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \
op0, op1,\
gen3_fs_operand_none()); \
} else { \
gen3_fs_arith (DP4, dest_reg, op0, op1,\
gen3_fs_operand_none()); \
} \
} while (0)
 
#define SHADER_TRAPEZOIDS (1 << 24)
/drivers/video/Intel-2D/render/exa_sf.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
{ 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
{ 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
{ 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
{ 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
/drivers/video/Intel-2D/render/exa_sf_mask.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
{ 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
/drivers/video/Intel-2D/render/exa_wm_ca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x000000a0, 0x008d0100 },
{ 0x00802048, 0x210077be, 0x000000a4, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x000000b0, 0x008d0100 },
{ 0x00802048, 0x214077be, 0x000000b4, 0x008d0140 },
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 },
{ 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 },
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g5b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
{ 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
{ 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g7b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
{ 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
{ 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x23801c09, 0x20000000, 0x0a2a0102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x22c01c09, 0x20000000, 0x0a8a0102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 },
/drivers/video/Intel-2D/render/exa_wm_noca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_src_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
{ 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
{ 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
/drivers/video/Intel-2D/render/exa_wm_src_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x284077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x286077bd, 0x000000c0, 0x008d0080 },
{ 0x0060005a, 0x288077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x28a077bd, 0x000000d0, 0x008d0080 },
/drivers/video/Intel-2D/render/exa_wm_src_projective.g5b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
{ 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/render/exa_wm_src_projective.g7b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
{ 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
{ 0x00600041, 0x288077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22801c09, 0x20000000, 0x0a2a0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g5b
0,0 → 1,2
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g5b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 },
{ 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 },
{ 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g7b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0001 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0003 },
{ 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0005 },
/drivers/video/Intel-2D/render/exa_wm_write.g5b
0,0 → 1,6
{ 0x00802001, 0x304003be, 0x008d01c0, 0x00000000 },
{ 0x00802001, 0x306003be, 0x008d0200, 0x00000000 },
{ 0x00802001, 0x308003be, 0x008d0240, 0x00000000 },
{ 0x00802001, 0x30a003be, 0x008d0280, 0x00000000 },
{ 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x548d0000, 0x94084800 },
/drivers/video/Intel-2D/render/exa_wm_write.g7b
0,0 → 1,17
{ 0x00600001, 0x284003bd, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x286003bd, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x288003bd, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x28a003bd, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x28c003bd, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x28e003bd, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x290003bd, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x292003bd, 0x008d02a0, 0x00000000 },
{ 0x05800031, 0x24001ca8, 0x00000840, 0x90031000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/render/exa_wm_xy.g5b
0,0 → 1,4
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g5b
0,0 → 1,12
{ 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g7b
0,0 → 1,12
{ 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/render/exa_sf.g4b
0,0 → 1,15
{ 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
{ 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
{ 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
{ 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
{ 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
{ 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/render/exa_sf_mask.g4b
0,0 → 1,15
{ 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
{ 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/render/exa_wm_ca.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_ca.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00800041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00800041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00800041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00800041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00800041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g4b
0,0 → 1,8
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc },
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g6b
0,0 → 1,4
{ 0x0060005a, 0x210077be, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x212077be, 0x00000100, 0x008d0080 },
{ 0x0060005a, 0x214077be, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x216077be, 0x00000110, 0x008d0080 },
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g4b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g6b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
{ 0x00600041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x212077be, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
{ 0x00600041, 0x214077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x216077be, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x23801c09, 0x00000000, 0x02520102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x23801cc9, 0x000000e0, 0x0a2a0102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x22c01c09, 0x00000000, 0x02580102 },
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22c01cc9, 0x000000e0, 0x0a8a0102 },
/drivers/video/Intel-2D/render/exa_wm_noca.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_noca.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00800041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00800041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/render/exa_wm_src_affine.g4b
0,0 → 1,8
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
/drivers/video/Intel-2D/render/exa_wm_src_affine.g6b
0,0 → 1,4
{ 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 },
{ 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 },
/drivers/video/Intel-2D/render/exa_wm_src_projective.g4b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/render/exa_wm_src_projective.g6b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
{ 0x00600041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x206077be, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
{ 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x20a077be, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22801c09, 0x00000000, 0x02520001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22801cc9, 0x00000020, 0x0a2a0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x21c01c09, 0x00000000, 0x02580001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g4b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22001c09, 0x00000000, 0x02520001 },
{ 0x01800031, 0x21c01c09, 0x00000000, 0x02520003 },
{ 0x01800031, 0x22401c09, 0x00000000, 0x02520005 },
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g6b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22001cc9, 0x00000020, 0x0a2a0001 },
{ 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0003 },
{ 0x02800031, 0x22401cc9, 0x00000020, 0x0a2a0005 },
/drivers/video/Intel-2D/render/exa_wm_write.g4b
0,0 → 1,18
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
{ 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 },
{ 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 },
{ 0x00601001, 0x210003be, 0x008d0260, 0x00000000 },
{ 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/render/exa_wm_write.g6b
0,0 → 1,17
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/render/exa_wm_xy.g4b
0,0 → 1,4
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g4b
0,0 → 1,12
{ 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g6b
0,0 → 1,12
{ 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/compiler.h
0,0 → 1,65
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef _SNA_COMPILER_H_
#define _SNA_COMPILER_H_
 
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
#define likely(expr) (__builtin_expect (!!(expr), 1))
#define unlikely(expr) (__builtin_expect (!!(expr), 0))
#define noinline __attribute__((noinline))
#define force_inline inline __attribute__((always_inline))
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define __packed__ __attribute__((__packed__))
#define flatten __attribute__((flatten))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#define noinline
#define force_inline
#define fastcall
#define must_check
#define constant
#define pure
#define __packed__
#define flatten
#endif
 
#ifdef HAVE_VALGRIND
#define VG(x) x
#else
#define VG(x)
#endif
 
#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
 
#define COMPILE_TIME_ASSERT(E) ((void)sizeof(char[1 - 2*!(E)]))
 
#endif /* _SNA_COMPILER_H_ */
/drivers/video/Intel-2D/gen4_render.h
0,0 → 1,2693
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef GEN4_RENDER_H
#define GEN4_RENDER_H
 
#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
 
#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
 
#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
 
#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
 
#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
 
#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
 
#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
 
#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
 
#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
 
#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
/* DW1 */
# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN4_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN4_3DSTATE_PIPELINED_POINTERS */
#define GEN4_GS_DISABLE 0
#define GEN4_GS_ENABLE 1
#define GEN4_CLIP_DISABLE 0
#define GEN4_CLIP_ENABLE 1
 
/* for GEN4_PIPE_CONTROL */
#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define VE0_VALID (1 << 26)
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen4_defines.h */
#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN4_SVG_CTL 0x7400
 
#define GEN4_SVG_CTL_GS_BA (0 << 8)
#define GEN4_SVG_CTL_SS_BA (1 << 8)
#define GEN4_SVG_CTL_IO_BA (2 << 8)
#define GEN4_SVG_CTL_GS_AUB (3 << 8)
#define GEN4_SVG_CTL_IO_AUB (4 << 8)
#define GEN4_SVG_CTL_SIP (5 << 8)
 
#define GEN4_SVG_RDATA 0x7404
#define GEN4_SVG_WORK_CTL 0x7408
 
#define GEN4_VF_CTL 0x7500
 
#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VF_STRG_VAL 0x7504
#define GEN4_VF_STR_VL_OVR 0x7508
#define GEN4_VF_VC_OVR 0x750c
#define GEN4_VF_STR_PSKIP 0x7510
#define GEN4_VF_MAX_PRIM 0x7514
#define GEN4_VF_RDATA 0x7518
 
#define GEN4_VS_CTL 0x7600
#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VS_STRG_VAL 0x7604
#define GEN4_VS_RDATA 0x7608
 
#define GEN4_SF_CTL 0x7b00
#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_SF_STRG_VAL 0x7b04
#define GEN4_SF_RDATA 0x7b18
 
#define GEN4_WIZ_CTL 0x7c00
#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_WIZ_STRG_VAL 0x7c04
#define GEN4_WIZ_RDATA 0x7c18
 
#define GEN4_TS_CTL 0x7e00
#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_TS_STRG_VAL 0x7e04
#define GEN4_TS_RDATA 0x7e08
 
#define GEN4_TD_CTL 0x8000
#define GEN4_TD_CTL_MUX_SHIFT 8
#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN4_TD_CTL2 0x8004
#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN4_TD_VF_VS_EMSK 0x8008
#define GEN4_TD_GS_EMSK 0x800c
#define GEN4_TD_CLIP_EMSK 0x8010
#define GEN4_TD_SF_EMSK 0x8014
#define GEN4_TD_WIZ_EMSK 0x8018
#define GEN4_TD_0_6_EHTRG_VAL 0x801c
#define GEN4_TD_0_7_EHTRG_VAL 0x8020
#define GEN4_TD_0_6_EHTRG_MSK 0x8024
#define GEN4_TD_0_7_EHTRG_MSK 0x8028
#define GEN4_TD_RDATA 0x802c
#define GEN4_TD_TS_EMSK 0x8030
 
#define GEN4_EU_CTL 0x8800
#define GEN4_EU_CTL_SELECT_SHIFT 16
#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
#define GEN4_EU_ATT_0 0x8810
#define GEN4_EU_ATT_1 0x8814
#define GEN4_EU_ATT_DATA_0 0x8820
#define GEN4_EU_ATT_DATA_1 0x8824
#define GEN4_EU_ATT_CLR_0 0x8830
#define GEN4_EU_ATT_CLR_1 0x8834
#define GEN4_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN4_ANISORATIO_2 0
#define GEN4_ANISORATIO_4 1
#define GEN4_ANISORATIO_6 2
#define GEN4_ANISORATIO_8 3
#define GEN4_ANISORATIO_10 4
#define GEN4_ANISORATIO_12 5
#define GEN4_ANISORATIO_14 6
#define GEN4_ANISORATIO_16 7
 
#define GEN4_BLENDFACTOR_ONE 0x1
#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
#define GEN4_BLENDFACTOR_DST_COLOR 0x5
#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN4_BLENDFACTOR_ZERO 0x11
#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN4_BLENDFUNCTION_ADD 0
#define GEN4_BLENDFUNCTION_SUBTRACT 1
#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN4_BLENDFUNCTION_MIN 3
#define GEN4_BLENDFUNCTION_MAX 4
 
#define GEN4_ALPHATEST_FORMAT_UNORM8 0
#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN4_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN4_CLIP_API_OGL 0
#define GEN4_CLIP_API_DX 1
 
#define GEN4_CLIPMODE_NORMAL 0
#define GEN4_CLIPMODE_CLIP_ALL 1
#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN4_CLIPMODE_REJECT_ALL 3
#define GEN4_CLIPMODE_ACCEPT_ALL 4
 
#define GEN4_CLIP_NDCSPACE 0
#define GEN4_CLIP_SCREENSPACE 1
 
#define GEN4_COMPAREFUNCTION_ALWAYS 0
#define GEN4_COMPAREFUNCTION_NEVER 1
#define GEN4_COMPAREFUNCTION_LESS 2
#define GEN4_COMPAREFUNCTION_EQUAL 3
#define GEN4_COMPAREFUNCTION_LEQUAL 4
#define GEN4_COMPAREFUNCTION_GREATER 5
#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
#define GEN4_COMPAREFUNCTION_GEQUAL 7
 
#define GEN4_COVERAGE_PIXELS_HALF 0
#define GEN4_COVERAGE_PIXELS_1 1
#define GEN4_COVERAGE_PIXELS_2 2
#define GEN4_COVERAGE_PIXELS_4 3
 
#define GEN4_CULLMODE_BOTH 0
#define GEN4_CULLMODE_NONE 1
#define GEN4_CULLMODE_FRONT 2
#define GEN4_CULLMODE_BACK 3
 
#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN4_DEPTHFORMAT_D32_FLOAT 1
#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN4_DEPTHFORMAT_D16_UNORM 5
 
#define GEN4_FLOATING_POINT_IEEE_754 0
#define GEN4_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN4_FRONTWINDING_CW 0
#define GEN4_FRONTWINDING_CCW 1
 
#define GEN4_INDEX_BYTE 0
#define GEN4_INDEX_WORD 1
#define GEN4_INDEX_DWORD 2
 
#define GEN4_LOGICOPFUNCTION_CLEAR 0
#define GEN4_LOGICOPFUNCTION_NOR 1
#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN4_LOGICOPFUNCTION_INVERT 5
#define GEN4_LOGICOPFUNCTION_XOR 6
#define GEN4_LOGICOPFUNCTION_NAND 7
#define GEN4_LOGICOPFUNCTION_AND 8
#define GEN4_LOGICOPFUNCTION_EQUIV 9
#define GEN4_LOGICOPFUNCTION_NOOP 10
#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN4_LOGICOPFUNCTION_COPY 12
#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN4_LOGICOPFUNCTION_OR 14
#define GEN4_LOGICOPFUNCTION_SET 15
 
#define GEN4_MAPFILTER_NEAREST 0x0
#define GEN4_MAPFILTER_LINEAR 0x1
#define GEN4_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN4_MIPFILTER_NONE 0
#define GEN4_MIPFILTER_NEAREST 1
#define GEN4_MIPFILTER_LINEAR 3
 
#define GEN4_POLYGON_FRONT_FACING 0
#define GEN4_POLYGON_BACK_FACING 1
 
#define GEN4_PREFILTER_ALWAYS 0x0
#define GEN4_PREFILTER_NEVER 0x1
#define GEN4_PREFILTER_LESS 0x2
#define GEN4_PREFILTER_EQUAL 0x3
#define GEN4_PREFILTER_LEQUAL 0x4
#define GEN4_PREFILTER_GREATER 0x5
#define GEN4_PREFILTER_NOTEQUAL 0x6
#define GEN4_PREFILTER_GEQUAL 0x7
 
#define GEN4_PROVOKING_VERTEX_0 0
#define GEN4_PROVOKING_VERTEX_1 1
#define GEN4_PROVOKING_VERTEX_2 2
 
#define GEN4_RASTRULE_UPPER_LEFT 0
#define GEN4_RASTRULE_UPPER_RIGHT 1
 
#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN4_STENCILOP_KEEP 0
#define GEN4_STENCILOP_ZERO 1
#define GEN4_STENCILOP_REPLACE 2
#define GEN4_STENCILOP_INCRSAT 3
#define GEN4_STENCILOP_DECRSAT 4
#define GEN4_STENCILOP_INCR 5
#define GEN4_STENCILOP_DECR 6
#define GEN4_STENCILOP_INVERT 7
 
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
#define GEN4_SURFACEFORMAT_R8_SINT 0x142
#define GEN4_SURFACEFORMAT_R8_UINT 0x143
#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN4_SURFACEFORMAT_R1_UINT 0x181
#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN4_SURFACEFORMAT_MONO8 0x18E
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN4_SURFACEFORMAT_FXT1 0x192
#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
#define GEN4_SURFACERETURNFORMAT_S1 1
 
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
#define GEN4_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN4_TEXCOORDMODE_WRAP 0
#define GEN4_TEXCOORDMODE_MIRROR 1
#define GEN4_TEXCOORDMODE_CLAMP 2
#define GEN4_TEXCOORDMODE_CUBE 3
#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN4_THREAD_PRIORITY_NORMAL 0
#define GEN4_THREAD_PRIORITY_HIGH 1
 
#define GEN4_TILEWALK_XMAJOR 0
#define GEN4_TILEWALK_YMAJOR 1
 
#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define VFCOMPONENT_NOSTORE 0
#define VFCOMPONENT_STORE_SRC 1
#define VFCOMPONENT_STORE_0 2
#define VFCOMPONENT_STORE_1_FLT 3
#define VFCOMPONENT_STORE_1_INT 4
#define VFCOMPONENT_STORE_VID 5
#define VFCOMPONENT_STORE_IID 6
#define VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN4_ALIGN_1 0
#define GEN4_ALIGN_16 1
 
#define GEN4_ADDRESS_DIRECT 0
#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN4_CHANNEL_X 0
#define GEN4_CHANNEL_Y 1
#define GEN4_CHANNEL_Z 2
#define GEN4_CHANNEL_W 3
 
#define GEN4_COMPRESSION_NONE 0
#define GEN4_COMPRESSION_2NDHALF 1
#define GEN4_COMPRESSION_COMPRESSED 2
 
#define GEN4_CONDITIONAL_NONE 0
#define GEN4_CONDITIONAL_Z 1
#define GEN4_CONDITIONAL_NZ 2
#define GEN4_CONDITIONAL_EQ 1 /* Z */
#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
#define GEN4_CONDITIONAL_G 3
#define GEN4_CONDITIONAL_GE 4
#define GEN4_CONDITIONAL_L 5
#define GEN4_CONDITIONAL_LE 6
#define GEN4_CONDITIONAL_C 7
#define GEN4_CONDITIONAL_O 8
 
#define GEN4_DEBUG_NONE 0
#define GEN4_DEBUG_BREAKPOINT 1
 
#define GEN4_DEPENDENCY_NORMAL 0
#define GEN4_DEPENDENCY_NOTCLEARED 1
#define GEN4_DEPENDENCY_NOTCHECKED 2
#define GEN4_DEPENDENCY_DISABLE 3
 
#define GEN4_EXECUTE_1 0
#define GEN4_EXECUTE_2 1
#define GEN4_EXECUTE_4 2
#define GEN4_EXECUTE_8 3
#define GEN4_EXECUTE_16 4
#define GEN4_EXECUTE_32 5
 
#define GEN4_HORIZONTAL_STRIDE_0 0
#define GEN4_HORIZONTAL_STRIDE_1 1
#define GEN4_HORIZONTAL_STRIDE_2 2
#define GEN4_HORIZONTAL_STRIDE_4 3
 
#define GEN4_INSTRUCTION_NORMAL 0
#define GEN4_INSTRUCTION_SATURATE 1
 
#define _MASK_ENABLE 0
#define _MASK_DISABLE 1
 
#define GEN4_OPCODE_MOV 1
#define GEN4_OPCODE_SEL 2
#define GEN4_OPCODE_NOT 4
#define GEN4_OPCODE_AND 5
#define GEN4_OPCODE_OR 6
#define GEN4_OPCODE_XOR 7
#define GEN4_OPCODE_SHR 8
#define GEN4_OPCODE_SHL 9
#define GEN4_OPCODE_RSR 10
#define GEN4_OPCODE_RSL 11
#define GEN4_OPCODE_ASR 12
#define GEN4_OPCODE_CMP 16
#define GEN4_OPCODE_JMPI 32
#define GEN4_OPCODE_IF 34
#define GEN4_OPCODE_IFF 35
#define GEN4_OPCODE_ELSE 36
#define GEN4_OPCODE_ENDIF 37
#define GEN4_OPCODE_DO 38
#define GEN4_OPCODE_WHILE 39
#define GEN4_OPCODE_BREAK 40
#define GEN4_OPCODE_CONTINUE 41
#define GEN4_OPCODE_HALT 42
#define GEN4_OPCODE_MSAVE 44
#define GEN4_OPCODE_MRESTORE 45
#define GEN4_OPCODE_PUSH 46
#define GEN4_OPCODE_POP 47
#define GEN4_OPCODE_WAIT 48
#define GEN4_OPCODE_SEND 49
#define GEN4_OPCODE_ADD 64
#define GEN4_OPCODE_MUL 65
#define GEN4_OPCODE_AVG 66
#define GEN4_OPCODE_FRC 67
#define GEN4_OPCODE_RNDU 68
#define GEN4_OPCODE_RNDD 69
#define GEN4_OPCODE_RNDE 70
#define GEN4_OPCODE_RNDZ 71
#define GEN4_OPCODE_MAC 72
#define GEN4_OPCODE_MACH 73
#define GEN4_OPCODE_LZD 74
#define GEN4_OPCODE_SAD2 80
#define GEN4_OPCODE_SADA2 81
#define GEN4_OPCODE_DP4 84
#define GEN4_OPCODE_DPH 85
#define GEN4_OPCODE_DP3 86
#define GEN4_OPCODE_DP2 87
#define GEN4_OPCODE_DPA2 88
#define GEN4_OPCODE_LINE 89
#define GEN4_OPCODE_NOP 126
 
#define GEN4_PREDICATE_NONE 0
#define GEN4_PREDICATE_NORMAL 1
#define GEN4_PREDICATE_ALIGN1_ANYV 2
#define GEN4_PREDICATE_ALIGN1_ALLV 3
#define GEN4_PREDICATE_ALIGN1_ANY2H 4
#define GEN4_PREDICATE_ALIGN1_ALL2H 5
#define GEN4_PREDICATE_ALIGN1_ANY4H 6
#define GEN4_PREDICATE_ALIGN1_ALL4H 7
#define GEN4_PREDICATE_ALIGN1_ANY8H 8
#define GEN4_PREDICATE_ALIGN1_ALL8H 9
#define GEN4_PREDICATE_ALIGN1_ANY16H 10
#define GEN4_PREDICATE_ALIGN1_ALL16H 11
#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN4_PREDICATE_ALIGN16_ANY4H 6
#define GEN4_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN4_ARCHITECTURE_REGISTER_FILE 0
#define GEN4_GENERAL_REGISTER_FILE 1
#define GEN4_MESSAGE_REGISTER_FILE 2
#define GEN4_IMMEDIATE_VALUE 3
 
#define GEN4_REGISTER_TYPE_UD 0
#define GEN4_REGISTER_TYPE_D 1
#define GEN4_REGISTER_TYPE_UW 2
#define GEN4_REGISTER_TYPE_W 3
#define GEN4_REGISTER_TYPE_UB 4
#define GEN4_REGISTER_TYPE_B 5
#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN4_REGISTER_TYPE_HF 6
#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN4_REGISTER_TYPE_F 7
 
#define GEN4_ARF_NULL 0x00
#define GEN4_ARF_ADDRESS 0x10
#define GEN4_ARF_ACCUMULATOR 0x20
#define GEN4_ARF_FLAG 0x30
#define GEN4_ARF_MASK 0x40
#define GEN4_ARF_MASK_STACK 0x50
#define GEN4_ARF_MASK_STACK_DEPTH 0x60
#define GEN4_ARF_STATE 0x70
#define GEN4_ARF_CONTROL 0x80
#define GEN4_ARF_NOTIFICATION_COUNT 0x90
#define GEN4_ARF_IP 0xA0
 
#define GEN4_AMASK 0
#define GEN4_IMASK 1
#define GEN4_LMASK 2
#define GEN4_CMASK 3
 
 
 
#define GEN4_THREAD_NORMAL 0
#define GEN4_THREAD_ATOMIC 1
#define GEN4_THREAD_SWITCH 2
 
#define GEN4_VERTICAL_STRIDE_0 0
#define GEN4_VERTICAL_STRIDE_1 1
#define GEN4_VERTICAL_STRIDE_2 2
#define GEN4_VERTICAL_STRIDE_4 3
#define GEN4_VERTICAL_STRIDE_8 4
#define GEN4_VERTICAL_STRIDE_16 5
#define GEN4_VERTICAL_STRIDE_32 6
#define GEN4_VERTICAL_STRIDE_64 7
#define GEN4_VERTICAL_STRIDE_128 8
#define GEN4_VERTICAL_STRIDE_256 9
#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN4_WIDTH_1 0
#define GEN4_WIDTH_2 1
#define GEN4_WIDTH_4 2
#define GEN4_WIDTH_8 3
#define GEN4_WIDTH_16 4
 
#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN4_POLYGON_FACING_FRONT 0
#define GEN4_POLYGON_FACING_BACK 1
 
#define GEN4_MESSAGE_TARGET_NULL 0
#define GEN4_MESSAGE_TARGET_MATH 1
#define GEN4_MESSAGE_TARGET_SAMPLER 2
#define GEN4_MESSAGE_TARGET_GATEWAY 3
#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN4_MESSAGE_TARGET_URB 6
#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN4_MATH_FUNCTION_INV 1
#define GEN4_MATH_FUNCTION_LOG 2
#define GEN4_MATH_FUNCTION_EXP 3
#define GEN4_MATH_FUNCTION_SQRT 4
#define GEN4_MATH_FUNCTION_RSQ 5
#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN4_MATH_FUNCTION_TAN 9
#define GEN4_MATH_FUNCTION_POW 10
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN4_MATH_INTEGER_UNSIGNED 0
#define GEN4_MATH_INTEGER_SIGNED 1
 
#define GEN4_MATH_PRECISION_FULL 0
#define GEN4_MATH_PRECISION_PARTIAL 1
 
#define GEN4_MATH_SATURATE_NONE 0
#define GEN4_MATH_SATURATE_SATURATE 1
 
#define GEN4_MATH_DATA_VECTOR 0
#define GEN4_MATH_DATA_SCALAR 1
 
#define GEN4_URB_OPCODE_WRITE 0
 
#define GEN4_URB_SWIZZLE_NONE 0
#define GEN4_URB_SWIZZLE_INTERLEAVE 1
#define GEN4_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN4_SCRATCH_SPACE_SIZE_1K 0
#define GEN4_SCRATCH_SPACE_SIZE_2K 1
#define GEN4_SCRATCH_SPACE_SIZE_4K 2
#define GEN4_SCRATCH_SPACE_SIZE_8K 3
#define GEN4_SCRATCH_SPACE_SIZE_16K 4
#define GEN4_SCRATCH_SPACE_SIZE_32K 5
#define GEN4_SCRATCH_SPACE_SIZE_64K 6
#define GEN4_SCRATCH_SPACE_SIZE_128K 7
#define GEN4_SCRATCH_SPACE_SIZE_256K 8
#define GEN4_SCRATCH_SPACE_SIZE_512K 9
#define GEN4_SCRATCH_SPACE_SIZE_1M 10
#define GEN4_SCRATCH_SPACE_SIZE_2M 11
 
 
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CONST_BUFFER_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT 0x6104
 
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
#define CMD_VERTEX_ELEMENT 0x7809
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS 0x780b
 
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_3D_PRIM 0x7b00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
 
/* media pipeline */
 
#define GEN4_VFE_MODE_GENERIC 0x0
#define GEN4_VFE_MODE_VLD_MPEG2 0x1
#define GEN4_VFE_MODE_IS 0x2
#define GEN4_VFE_MODE_AVC_MC 0x4
#define GEN4_VFE_MODE_AVC_IT 0x7
#define GEN4_VFE_MODE_VC1_IT 0xB
 
#define GEN4_VFE_DEBUG_COUNTER_FREE 0
#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
 
/* VLD_STATE */
#define GEN4_MPEG_TOP_FIELD 1
#define GEN4_MPEG_BOTTOM_FIELD 2
#define GEN4_MPEG_FRAME 3
#define GEN4_MPEG_QSCALE_LINEAR 0
#define GEN4_MPEG_QSCALE_NONLINEAR 1
#define GEN4_MPEG_ZIGZAG_SCAN 0
#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
#define GEN4_MPEG_I_PICTURE 1
#define GEN4_MPEG_P_PICTURE 2
#define GEN4_MPEG_B_PICTURE 3
 
/* Command packets:
*/
struct header
{
unsigned int length:16;
unsigned int opcode:16;
};
 
 
union header_union
{
struct header bits;
unsigned int dword;
};
 
struct gen4_3d_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:3;
unsigned int wc_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int operation:2;
unsigned int opcode:16;
} header;
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} dest;
unsigned int dword2;
unsigned int dword3;
};
 
 
struct gen4_3d_primitive
{
struct
{
unsigned int length:8;
unsigned int pad:2;
unsigned int topology:5;
unsigned int indexed:1;
unsigned int opcode:16;
} header;
 
unsigned int verts_per_instance;
unsigned int start_vert_location;
unsigned int instance_count;
unsigned int start_instance_location;
unsigned int base_vert_location;
};
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define GEN4_FLUSH_READ_CACHE 0x1
#define GEN4_FLUSH_STATE_CACHE 0x2
#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct gen4_mi_flush
{
unsigned int flags:4;
unsigned int pad:12;
unsigned int opcode:16;
};
 
struct gen4_vf_statistics
{
unsigned int statistics_enable:1;
unsigned int pad:15;
unsigned int opcode:16;
};
 
 
 
struct gen4_binding_table_pointers
{
struct header header;
unsigned int vs;
unsigned int gs;
unsigned int clp;
unsigned int sf;
unsigned int wm;
};
 
 
struct gen4_blend_constant_color
{
struct header header;
float blend_constant_color[4];
};
 
 
struct gen4_depthbuffer
{
union header_union header;
union {
struct {
unsigned int pitch:18;
unsigned int format:3;
unsigned int pad:4;
unsigned int depth_offset_disable:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad2:1;
unsigned int surface_type:3;
} bits;
unsigned int dword;
} dword1;
unsigned int dword2_base_addr;
union {
struct {
unsigned int pad:1;
unsigned int mipmap_layout:1;
unsigned int lod:4;
unsigned int width:13;
unsigned int height:13;
} bits;
unsigned int dword;
} dword3;
 
union {
struct {
unsigned int pad:12;
unsigned int min_array_element:9;
unsigned int depth:11;
} bits;
unsigned int dword;
} dword4;
};
 
struct gen4_drawrect
{
struct header header;
unsigned int xmin:16;
unsigned int ymin:16;
unsigned int xmax:16;
unsigned int ymax:16;
unsigned int xorg:16;
unsigned int yorg:16;
};
 
 
 
 
struct gen4_global_depth_offset_clamp
{
struct header header;
float depth_offset_clamp;
};
 
struct gen4_indexbuffer
{
union {
struct
{
unsigned int length:8;
unsigned int index_format:2;
unsigned int cut_index_enable:1;
unsigned int pad:5;
unsigned int opcode:16;
} bits;
unsigned int dword;
 
} header;
 
unsigned int buffer_start;
unsigned int buffer_end;
};
 
 
struct gen4_line_stipple
{
struct header header;
struct
{
unsigned int pattern:16;
unsigned int pad:16;
} bits0;
struct
{
unsigned int repeat_count:9;
unsigned int pad:7;
unsigned int inverse_repeat_count:16;
} bits1;
};
 
 
struct gen4_pipelined_state_pointers
{
struct header header;
struct {
unsigned int pad:5;
unsigned int offset:27;
} vs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} gs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} clp;
struct
{
unsigned int pad:5;
unsigned int offset:27;
} sf;
 
struct
{
unsigned int pad:5;
unsigned int offset:27;
} wm;
struct
{
unsigned int pad:5;
unsigned int offset:27; /* KW: check me! */
} cc;
};
 
 
struct gen4_polygon_stipple_offset
{
struct header header;
 
struct {
unsigned int y_offset:5;
unsigned int pad:3;
unsigned int x_offset:5;
unsigned int pad0:19;
} bits0;
};
 
 
 
struct gen4_polygon_stipple
{
struct header header;
unsigned int stipple[32];
};
 
 
 
struct gen4_pipeline_select
{
struct
{
unsigned int pipeline_select:1;
unsigned int pad:15;
unsigned int opcode:16;
} header;
};
 
 
struct gen4_pipe_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:2;
unsigned int instruction_state_cache_flush_enable:1;
unsigned int write_cache_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int post_sync_operation:2;
 
unsigned int opcode:16;
} header;
 
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} bits1;
 
unsigned int data0;
unsigned int data1;
};
 
 
struct gen4_urb_fence
{
struct
{
unsigned int length:8;
unsigned int vs_realloc:1;
unsigned int gs_realloc:1;
unsigned int clp_realloc:1;
unsigned int sf_realloc:1;
unsigned int vfe_realloc:1;
unsigned int cs_realloc:1;
unsigned int pad:2;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int vs_fence:10;
unsigned int gs_fence:10;
unsigned int clp_fence:10;
unsigned int pad:2;
} bits0;
 
struct
{
unsigned int sf_fence:10;
unsigned int vf_fence:10;
unsigned int cs_fence:10;
unsigned int pad:2;
} bits1;
};
 
struct gen4_constant_buffer_state /* previously gen4_command_streamer */
{
struct header header;
 
struct
{
unsigned int nr_urb_entries:3;
unsigned int pad:1;
unsigned int urb_entry_size:5;
unsigned int pad0:23;
} bits0;
};
 
struct gen4_constant_buffer
{
struct
{
unsigned int length:8;
unsigned int valid:1;
unsigned int pad:7;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int buffer_length:6;
unsigned int buffer_address:26;
} bits0;
};
 
struct gen4_state_base_address
{
struct header header;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int general_state_address:27;
} bits0;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int surface_state_address:27;
} bits1;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int indirect_object_state_address:27;
} bits2;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int general_state_upper_bound:20;
} bits3;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int indirect_object_state_upper_bound:20;
} bits4;
};
 
struct gen4_state_prefetch
{
struct header header;
 
struct
{
unsigned int prefetch_count:3;
unsigned int pad:3;
unsigned int prefetch_pointer:26;
} bits0;
};
 
struct gen4_system_instruction_pointer
{
struct header header;
 
struct
{
unsigned int pad:4;
unsigned int system_instruction_pointer:28;
} bits0;
};
 
 
 
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
unsigned int pad0:1;
unsigned int grf_reg_count:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer:26;
};
 
struct thread1
{
unsigned int ext_halt_exception_enable:1;
unsigned int sw_exception_enable:1;
unsigned int mask_stack_exception_enable:1;
unsigned int timeout_exception_enable:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad0:3;
unsigned int depth_coef_urb_read_offset:6; /* WM only */
unsigned int pad1:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad3:5;
unsigned int single_program_flow:1;
};
 
struct thread2
{
unsigned int per_thread_scratch_space:4;
unsigned int pad0:6;
unsigned int scratch_space_base_pointer:22;
};
 
struct thread3
{
unsigned int dispatch_grf_start_reg:4;
unsigned int urb_entry_read_offset:6;
unsigned int pad0:1;
unsigned int urb_entry_read_length:6;
unsigned int pad1:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int pad2:1;
unsigned int const_urb_entry_read_length:6;
unsigned int pad3:1;
};
 
 
 
struct gen4_clip_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:9;
unsigned int gs_output_stats:1; /* not always */
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6; /* may be less */
unsigned int pad3:1;
} thread4;
struct
{
unsigned int pad0:13;
unsigned int clip_mode:3;
unsigned int userclip_enable_flags:8;
unsigned int userclip_must_clip:1;
unsigned int pad1:1;
unsigned int guard_band_enable:1;
unsigned int viewport_z_clip_enable:1;
unsigned int viewport_xy_clip_enable:1;
unsigned int vertex_position_space:1;
unsigned int api_mode:1;
unsigned int pad2:1;
} clip5;
struct
{
unsigned int pad0:5;
unsigned int clipper_viewport_state_ptr:27;
} clip6;
 
float viewport_xmin;
float viewport_xmax;
float viewport_ymin;
float viewport_ymax;
};
 
 
 
struct gen4_cc_unit_state
{
struct
{
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} cc0;
 
struct
{
unsigned int bf_stencil_ref:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
unsigned int stencil_ref:8;
} cc1;
 
struct
{
unsigned int logicop_enable:1;
unsigned int pad0:10;
unsigned int depth_write_enable:1;
unsigned int depth_test_function:3;
unsigned int depth_test:1;
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
} cc2;
 
struct
{
unsigned int pad0:8;
unsigned int alpha_test_func:3;
unsigned int alpha_test:1;
unsigned int blend_enable:1;
unsigned int ia_blend_enable:1;
unsigned int pad1:1;
unsigned int alpha_test_format:1;
unsigned int pad2:16;
} cc3;
struct
{
unsigned int pad0:5;
unsigned int cc_viewport_state_offset:27;
} cc4;
struct
{
unsigned int pad0:2;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_src_blend_factor:5;
unsigned int ia_blend_function:3;
unsigned int statistics_enable:1;
unsigned int logicop_func:4;
unsigned int pad1:11;
unsigned int dither_enable:1;
} cc5;
 
struct
{
unsigned int clamp_post_alpha_blend:1;
unsigned int clamp_pre_alpha_blend:1;
unsigned int clamp_range:2;
unsigned int pad0:11;
unsigned int y_dither_offset:2;
unsigned int x_dither_offset:2;
unsigned int dest_blend_factor:5;
unsigned int src_blend_factor:5;
unsigned int blend_function:3;
} cc6;
 
struct {
union {
float f;
unsigned char ub[4];
} alpha_ref;
} cc7;
};
 
 
 
struct gen4_sf_unit_state
{
struct thread0 thread0;
struct {
unsigned int pad0:7;
unsigned int sw_exception_enable:1;
unsigned int pad1:3;
unsigned int mask_stack_exception_enable:1;
unsigned int pad2:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad3:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad4:5;
unsigned int single_program_flow:1;
} sf1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6;
unsigned int pad3:1;
} thread4;
 
struct
{
unsigned int front_winding:1;
unsigned int viewport_transform:1;
unsigned int pad0:3;
unsigned int sf_viewport_state_offset:27;
} sf5;
struct
{
unsigned int pad0:9;
unsigned int dest_org_vbias:4;
unsigned int dest_org_hbias:4;
unsigned int scissor:1;
unsigned int disable_2x2_trifilter:1;
unsigned int disable_zero_pix_trifilter:1;
unsigned int point_rast_rule:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int line_width:4;
unsigned int fast_scissor_disable:1;
unsigned int cull_mode:2;
unsigned int aa_enable:1;
} sf6;
 
struct
{
unsigned int point_size:11;
unsigned int use_point_size_state:1;
unsigned int subpixel_precision:1;
unsigned int sprite_point:1;
unsigned int pad0:11;
unsigned int trifan_pv:2;
unsigned int linestrip_pv:2;
unsigned int tristrip_pv:2;
unsigned int line_last_pixel_enable:1;
} sf7;
 
};
 
 
struct gen4_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:1;
unsigned int pad3:6;
} thread4;
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} gs5;
 
struct
{
unsigned int max_vp_index:4;
unsigned int pad0:26;
unsigned int reorder_enable:1;
unsigned int pad1:1;
} gs6;
};
 
 
struct gen4_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:4;
unsigned int pad3:3;
} thread4;
 
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} vs5;
 
struct
{
unsigned int vs_enable:1;
unsigned int vert_cache_disable:1;
unsigned int pad0:30;
} vs6;
};
 
 
struct gen4_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned int stats_enable:1;
unsigned int pad0:1;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} wm4;
struct
{
unsigned int enable_8_pix:1;
unsigned int enable_16_pix:1;
unsigned int enable_32_pix:1;
unsigned int pad0:7;
unsigned int legacy_global_depth_bias:1;
unsigned int line_stipple:1;
unsigned int depth_offset:1;
unsigned int polygon_stipple:1;
unsigned int line_aa_region_width:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int early_depth_test:1;
unsigned int thread_dispatch_enable:1;
unsigned int program_uses_depth:1;
unsigned int program_computes_depth:1;
unsigned int program_uses_killpixel:1;
unsigned int legacy_line_rast: 1;
unsigned int transposed_urb_read:1;
unsigned int max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_1:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_1:26;
} wm8;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_2:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_2:26;
} wm9;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_3:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_3:26;
} wm10;
};
 
struct gen4_wm_unit_state_padded {
struct gen4_wm_unit_state state;
char pad[64 - sizeof(struct gen4_wm_unit_state)];
};
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen4_sampler_default_border_color {
float color[4];
};
 
struct gen4_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen4_sampler_state
{
struct
{
unsigned int shadow_function:3;
unsigned int lod_bias:11;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad:1;
unsigned int lod_preclamp:1;
unsigned int border_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct
{
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:3;
unsigned int max_lod:10;
unsigned int min_lod:10;
} ss1;
 
struct
{
unsigned int pad:5;
unsigned int border_color_pointer:27;
} ss2;
struct
{
unsigned int pad:19;
unsigned int max_aniso:3;
unsigned int chroma_key_mode:1;
unsigned int chroma_key_index:2;
unsigned int chroma_key_enable:1;
unsigned int monochrome_filter_width:3;
unsigned int monochrome_filter_height:3;
} ss3;
};
 
 
struct gen4_clipper_viewport
{
float xmin;
float xmax;
float ymin;
float ymax;
};
 
struct gen4_cc_viewport
{
float min_depth;
float max_depth;
};
 
struct gen4_sf_viewport
{
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
 
struct {
short xmin;
short ymin;
short xmax;
short ymax;
} scissor;
};
 
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct gen4_surface_state
{
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad:3;
unsigned int render_cache_read_mode:1;
unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int color_blend:1;
unsigned int writedisable_blue:1;
unsigned int writedisable_green:1;
unsigned int writedisable_red:1;
unsigned int writedisable_alpha:1;
unsigned int surface_format:9;
unsigned int data_return_format:1;
unsigned int pad0:1;
unsigned int surface_type:3;
} ss0;
struct {
unsigned int base_addr;
} ss1;
struct {
unsigned int render_target_rotation:2;
unsigned int mip_count:4;
unsigned int width:13;
unsigned int height:13;
} ss2;
 
struct {
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad:1;
unsigned int pitch:18;
unsigned int depth:11;
} ss3;
struct {
unsigned int pad:19;
unsigned int min_array_elt:9;
unsigned int min_lod:4;
} ss4;
 
struct {
unsigned int pad:20;
unsigned int y_offset:4;
unsigned int pad2:1;
unsigned int x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN4_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN4_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN4_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN4_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN4_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN4_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN4_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN4_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN4_SURFACE_FORMAT_SHIFT 18
#define GEN4_SURFACE_FORMAT_MASK _MASK(26, 18)
 
#define GEN4_SURFACE_TYPE_SHIFT 29
#define GEN4_SURFACE_TYPE_MASK _MASK(31, 29)
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN4_SURFACE_HEIGHT_SHIFT 19
#define GEN4_SURFACE_HEIGHT_MASK _MASK(31, 19)
#define GEN4_SURFACE_WIDTH_SHIFT 6
#define GEN4_SURFACE_WIDTH_MASK _MASK(18, 6)
#define GEN4_SURFACE_LOD_SHIFT 2
#define GEN4_SURFACE_LOD_MASK _MASK(5, 2)
 
/* Surface state DW3 */
#define GEN4_SURFACE_DEPTH_SHIFT 21
#define GEN4_SURFACE_DEPTH_MASK _MASK(31, 21)
#define GEN4_SURFACE_PITCH_SHIFT 3
#define GEN4_SURFACE_PITCH_MASK _MASK(19, 3)
#define GEN4_SURFACE_TILED (1 << 1)
#define GEN4_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN4_SURFACE_MIN_LOD_SHIFT 28
#define GEN4_SURFACE_MIN_LOD_MASK _MASK(31, 28)
 
/* Surface state DW5 */
#define GEN4_SURFACE_X_OFFSET_SHIFT 25
#define GEN4_SURFACE_X_OFFSET_MASK _MASK(31, 25)
#define GEN4_SURFACE_Y_OFFSET_SHIFT 20
#define GEN4_SURFACE_Y_OFFSET_MASK _MASK(23, 20)
 
 
struct gen4_vertex_buffer_state
{
struct {
unsigned int pitch:11;
unsigned int pad:15;
unsigned int access_type:1;
unsigned int vb_index:5;
} vb0;
unsigned int start_addr;
unsigned int max_index;
#if 1
unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
#endif
};
 
#define GEN4_VBP_MAX 17
 
struct gen4_vb_array_state {
struct header header;
struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
};
 
 
struct gen4_vertex_element_state
{
struct
{
unsigned int src_offset:11;
unsigned int pad:5;
unsigned int src_format:9;
unsigned int pad0:1;
unsigned int valid:1;
unsigned int vertex_buffer_index:5;
} ve0;
struct
{
unsigned int dst_offset:8;
unsigned int pad:8;
unsigned int vfcomponent3:4;
unsigned int vfcomponent2:4;
unsigned int vfcomponent1:4;
unsigned int vfcomponent0:4;
} ve1;
};
 
#define GEN4_VEP_MAX 18
 
struct gen4_vertex_element_packet {
struct header header;
struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
};
 
 
struct gen4_urb_immediate {
unsigned int opcode:4;
unsigned int offset:6;
unsigned int swizzle_control:2;
unsigned int pad:1;
unsigned int allocate:1;
unsigned int used:1;
unsigned int complete:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct gen4_instruction
{
struct
{
unsigned int opcode:7;
unsigned int pad:1;
unsigned int access_mode:1;
unsigned int mask_control:1;
unsigned int dependency_control:2;
unsigned int compression_control:2;
unsigned int thread_control:2;
unsigned int predicate_control:4;
unsigned int predicate_inverse:1;
unsigned int execution_size:3;
unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
unsigned int pad0:2;
unsigned int debug_control:1;
unsigned int saturate:1;
} header;
 
union {
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad:1;
unsigned int dest_subreg_nr:5;
unsigned int dest_reg_nr:8;
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} da1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad:6;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} ia1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad0:1;
unsigned int dest_writemask:4;
unsigned int dest_subreg_nr:1;
unsigned int dest_reg_nr:8;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} da16;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad0:6;
unsigned int dest_writemask:4;
int dest_indirect_offset:6;
unsigned int dest_subreg_nr:3;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} ia16;
} bits1;
 
 
union {
struct
{
unsigned int src0_subreg_nr:5;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} da1;
 
struct
{
int src0_indirect_offset:10;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} ia1;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
unsigned int src0_subreg_nr:1;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} da16;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
int src0_indirect_offset:6;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia16;
 
} bits2;
 
union
{
struct
{
unsigned int src1_subreg_nr:5;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int pad0:7;
} da1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
unsigned int src1_subreg_nr:1;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int pad2:7;
} da16;
 
struct
{
int src1_indirect_offset:10;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
int src1_indirect_offset:6;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad2:6;
} ia16;
 
 
struct
{
int jump_count:16; /* note: signed */
unsigned int pop_count:4;
unsigned int pad0:12;
} if_else;
 
struct {
unsigned int function:4;
unsigned int int_type:1;
unsigned int precision:1;
unsigned int saturate:1;
unsigned int data_type:1;
unsigned int pad0:8;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} math;
 
struct {
unsigned int binding_table_index:8;
unsigned int sampler:4;
unsigned int return_format:2;
unsigned int msg_type:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} sampler;
 
struct gen4_urb_immediate urb;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:4;
unsigned int msg_type:2;
unsigned int target_cache:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_read;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:3;
unsigned int pixel_scoreboard_clear:1;
unsigned int msg_type:3;
unsigned int send_commit_msg:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_write;
 
struct {
unsigned int pad:16;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} generic;
 
unsigned int ud;
} bits3;
};
 
/* media pipeline */
 
struct gen4_vfe_state {
struct {
unsigned int per_thread_scratch_space:4;
unsigned int pad3:3;
unsigned int extend_vfe_state_present:1;
unsigned int pad2:2;
unsigned int scratch_base:22;
} vfe0;
 
struct {
unsigned int debug_counter_control:2;
unsigned int children_present:1;
unsigned int vfe_mode:4;
unsigned int pad2:2;
unsigned int num_urb_entries:7;
unsigned int urb_entry_alloc_size:9;
unsigned int max_threads:7;
} vfe1;
 
struct {
unsigned int pad4:4;
unsigned int interface_descriptor_base:28;
} vfe2;
};
 
struct gen4_vld_state {
struct {
unsigned int pad6:6;
unsigned int scan_order:1;
unsigned int intra_vlc_format:1;
unsigned int quantizer_scale_type:1;
unsigned int concealment_motion_vector:1;
unsigned int frame_predict_frame_dct:1;
unsigned int top_field_first:1;
unsigned int picture_structure:2;
unsigned int intra_dc_precision:2;
unsigned int f_code_0_0:4;
unsigned int f_code_0_1:4;
unsigned int f_code_1_0:4;
unsigned int f_code_1_1:4;
} vld0;
 
struct {
unsigned int pad2:9;
unsigned int picture_coding_type:2;
unsigned int pad:21;
} vld1;
 
struct {
unsigned int index_0:4;
unsigned int index_1:4;
unsigned int index_2:4;
unsigned int index_3:4;
unsigned int index_4:4;
unsigned int index_5:4;
unsigned int index_6:4;
unsigned int index_7:4;
} desc_remap_table0;
 
struct {
unsigned int index_8:4;
unsigned int index_9:4;
unsigned int index_10:4;
unsigned int index_11:4;
unsigned int index_12:4;
unsigned int index_13:4;
unsigned int index_14:4;
unsigned int index_15:4;
} desc_remap_table1;
};
 
struct gen4_interface_descriptor {
struct {
unsigned int grf_reg_blocks:4;
unsigned int pad:2;
unsigned int kernel_start_pointer:26;
} desc0;
 
struct {
unsigned int pad:7;
unsigned int software_exception:1;
unsigned int pad2:3;
unsigned int maskstack_exception:1;
unsigned int pad3:1;
unsigned int illegal_opcode_exception:1;
unsigned int pad4:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int single_program_flow:1;
unsigned int pad5:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int const_urb_entry_read_len:6;
} desc1;
 
struct {
unsigned int pad:2;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} desc2;
 
struct {
unsigned int binding_table_entry_count:5;
unsigned int binding_table_pointer:27;
} desc3;
};
 
struct gen6_blend_state
{
struct {
unsigned int dest_blend_factor:5;
unsigned int source_blend_factor:5;
unsigned int pad3:1;
unsigned int blend_func:3;
unsigned int pad2:1;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_source_blend_factor:5;
unsigned int pad1:1;
unsigned int ia_blend_func:3;
unsigned int pad0:1;
unsigned int ia_blend_enable:1;
unsigned int blend_enable:1;
} blend0;
 
struct {
unsigned int post_blend_clamp_enable:1;
unsigned int pre_blend_clamp_enable:1;
unsigned int clamp_range:2;
unsigned int pad0:4;
unsigned int x_dither_offset:2;
unsigned int y_dither_offset:2;
unsigned int dither_enable:1;
unsigned int alpha_test_func:3;
unsigned int alpha_test_enable:1;
unsigned int pad1:1;
unsigned int logic_op_func:4;
unsigned int logic_op_enable:1;
unsigned int pad2:1;
unsigned int write_disable_b:1;
unsigned int write_disable_g:1;
unsigned int write_disable_r:1;
unsigned int write_disable_a:1;
unsigned int pad3:1;
unsigned int alpha_to_coverage_dither:1;
unsigned int alpha_to_one:1;
unsigned int alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
unsigned int alpha_test_format:1;
unsigned int pad0:14;
unsigned int round_disable:1;
unsigned int bf_stencil_ref:8;
unsigned int stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
unsigned int ui:8;
unsigned int pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} ds0;
 
struct {
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
} ds1;
 
struct {
unsigned int pad0:26;
unsigned int depth_write_enable:1;
unsigned int depth_test_func:3;
unsigned int pad1:1;
unsigned int depth_test_enable:1;
} ds2;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
typedef enum {
WM_KERNEL = 0,
WM_KERNEL_P,
 
WM_KERNEL_MASK,
WM_KERNEL_MASK_P,
 
WM_KERNEL_MASKCA,
WM_KERNEL_MASKCA_P,
 
WM_KERNEL_MASKSA,
WM_KERNEL_MASKSA_P,
 
WM_KERNEL_OPACITY,
WM_KERNEL_OPACITY_P,
 
WM_KERNEL_VIDEO_PLANAR,
WM_KERNEL_VIDEO_PACKED,
KERNEL_COUNT
} wm_kernel_t;
 
#endif
/drivers/video/Intel-2D/i915_drm.h
0,0 → 1,954
/*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef _I915_DRM_H_
#define _I915_DRM_H_
 
#include "drm.h"
 
/* Please note that modifications to all structs defined here are
* subject to backwards-compatibility constraints.
*/
 
 
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
* of chars for next/prev indices */
#define I915_LOG_MIN_TEX_REGION_SIZE 14
 
typedef struct _drm_i915_init {
enum {
I915_INIT_DMA = 0x01,
I915_CLEANUP_DMA = 0x02,
I915_RESUME_DMA = 0x03
} func;
unsigned int mmio_offset;
int sarea_priv_offset;
unsigned int ring_start;
unsigned int ring_end;
unsigned int ring_size;
unsigned int front_offset;
unsigned int back_offset;
unsigned int depth_offset;
unsigned int w;
unsigned int h;
unsigned int pitch;
unsigned int pitch_bits;
unsigned int back_pitch;
unsigned int depth_pitch;
unsigned int cpp;
unsigned int chipset;
} drm_i915_init_t;
 
typedef struct _drm_i915_sarea {
struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1];
int last_upload; /* last time texture was uploaded */
int last_enqueue; /* last time a buffer was enqueued */
int last_dispatch; /* age of the most recently dispatched buffer */
int ctxOwner; /* last context to upload state */
int texAge;
int pf_enabled; /* is pageflipping allowed? */
int pf_active;
int pf_current_page; /* which buffer is being displayed? */
int perf_boxes; /* performance boxes to be displayed */
int width, height; /* screen size in pixels */
 
drm_handle_t front_handle;
int front_offset;
int front_size;
 
drm_handle_t back_handle;
int back_offset;
int back_size;
 
drm_handle_t depth_handle;
int depth_offset;
int depth_size;
 
drm_handle_t tex_handle;
int tex_offset;
int tex_size;
int log_tex_granularity;
int pitch;
int rotation; /* 0, 90, 180 or 270 */
int rotated_offset;
int rotated_size;
int rotated_pitch;
int virtualX, virtualY;
 
unsigned int front_tiled;
unsigned int back_tiled;
unsigned int depth_tiled;
unsigned int rotated_tiled;
unsigned int rotated2_tiled;
 
int pipeA_x;
int pipeA_y;
int pipeA_w;
int pipeA_h;
int pipeB_x;
int pipeB_y;
int pipeB_w;
int pipeB_h;
 
/* fill out some space for old userspace triple buffer */
drm_handle_t unused_handle;
__u32 unused1, unused2, unused3;
 
/* buffer object handles for static buffers. May change
* over the lifetime of the client.
*/
__u32 front_bo_handle;
__u32 back_bo_handle;
__u32 unused_bo_handle;
__u32 depth_bo_handle;
 
} drm_i915_sarea_t;
 
/* due to userspace building against these headers we need some compat here */
#define planeA_x pipeA_x
#define planeA_y pipeA_y
#define planeA_w pipeA_w
#define planeA_h pipeA_h
#define planeB_x pipeB_x
#define planeB_y pipeB_y
#define planeB_w pipeB_w
#define planeB_h pipeB_h
 
/* Flags for perf_boxes
*/
#define I915_BOX_RING_EMPTY 0x1
#define I915_BOX_FLIP 0x2
#define I915_BOX_WAIT 0x4
#define I915_BOX_TEXTURE_LOAD 0x8
#define I915_BOX_LOST_CONTEXT 0x10
 
/* I915 specific ioctls
* The device specific ioctl range is 0x40 to 0x79.
*/
#define DRM_I915_INIT 0x00
#define DRM_I915_FLUSH 0x01
#define DRM_I915_FLIP 0x02
#define DRM_I915_BATCHBUFFER 0x03
#define DRM_I915_IRQ_EMIT 0x04
#define DRM_I915_IRQ_WAIT 0x05
#define DRM_I915_GETPARAM 0x06
#define DRM_I915_SETPARAM 0x07
#define DRM_I915_ALLOC 0x08
#define DRM_I915_FREE 0x09
#define DRM_I915_INIT_HEAP 0x0a
#define DRM_I915_CMDBUFFER 0x0b
#define DRM_I915_DESTROY_HEAP 0x0c
#define DRM_I915_SET_VBLANK_PIPE 0x0d
#define DRM_I915_GET_VBLANK_PIPE 0x0e
#define DRM_I915_VBLANK_SWAP 0x0f
#define DRM_I915_HWS_ADDR 0x11
#define DRM_I915_GEM_INIT 0x13
#define DRM_I915_GEM_EXECBUFFER 0x14
#define DRM_I915_GEM_PIN 0x15
#define DRM_I915_GEM_UNPIN 0x16
#define DRM_I915_GEM_BUSY 0x17
#define DRM_I915_GEM_THROTTLE 0x18
#define DRM_I915_GEM_ENTERVT 0x19
#define DRM_I915_GEM_LEAVEVT 0x1a
#define DRM_I915_GEM_CREATE 0x1b
#define DRM_I915_GEM_PREAD 0x1c
#define DRM_I915_GEM_PWRITE 0x1d
#define DRM_I915_GEM_MMAP 0x1e
#define DRM_I915_GEM_SET_DOMAIN 0x1f
#define DRM_I915_GEM_SW_FINISH 0x20
#define DRM_I915_GEM_SET_TILING 0x21
#define DRM_I915_GEM_GET_TILING 0x22
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
#define DRM_I915_GEM_MADVISE 0x26
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c
#define DRM_I915_GEM_CONTEXT_CREATE 0x2d
#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e
#define DRM_I915_GEM_SET_CACHEING 0x2f
#define DRM_I915_GEM_GET_CACHEING 0x30
#define DRM_I915_REG_READ 0x31
 
#define DRM_IOCTL_I915_INIT
#define DRM_IOCTL_I915_FLUSH
#define DRM_IOCTL_I915_FLIP
#define DRM_IOCTL_I915_BATCHBUFFER
#define DRM_IOCTL_I915_IRQ_EMIT
#define DRM_IOCTL_I915_IRQ_WAIT
#define DRM_IOCTL_I915_GETPARAM SRV_GET_PARAM
#define DRM_IOCTL_I915_SETPARAM
#define DRM_IOCTL_I915_ALLOC
#define DRM_IOCTL_I915_FREE
#define DRM_IOCTL_I915_INIT_HEAP
#define DRM_IOCTL_I915_CMDBUFFER
#define DRM_IOCTL_I915_DESTROY_HEAP
#define DRM_IOCTL_I915_SET_VBLANK_PIPE
#define DRM_IOCTL_I915_GET_VBLANK_PIPE
#define DRM_IOCTL_I915_VBLANK_SWAP
#define DRM_IOCTL_I915_HWS_ADDR
#define DRM_IOCTL_I915_GEM_INIT
#define DRM_IOCTL_I915_GEM_EXECBUFFER
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 SRV_I915_GEM_EXECBUFFER2
#define DRM_IOCTL_I915_GEM_PIN SRV_I915_GEM_PIN
#define DRM_IOCTL_I915_GEM_UNPIN
#define DRM_IOCTL_I915_GEM_BUSY SRV_I915_GEM_BUSY
#define DRM_IOCTL_I915_GEM_SET_CACHEING SRV_I915_GEM_SET_CACHEING
#define DRM_IOCTL_I915_GEM_GET_CACHEING
#define DRM_IOCTL_I915_GEM_THROTTLE SRV_I915_GEM_THROTTLE
#define DRM_IOCTL_I915_GEM_ENTERVT
#define DRM_IOCTL_I915_GEM_LEAVEVT
#define DRM_IOCTL_I915_GEM_CREATE SRV_I915_GEM_CREATE
#define DRM_IOCTL_I915_GEM_PREAD
#define DRM_IOCTL_I915_GEM_PWRITE SRV_I915_GEM_PWRITE
#define DRM_IOCTL_I915_GEM_MMAP SRV_I915_GEM_MMAP
#define DRM_IOCTL_I915_GEM_MMAP_GTT SRV_I915_GEM_MMAP_GTT
#define DRM_IOCTL_I915_GEM_SET_DOMAIN SRV_I915_GEM_SET_DOMAIN
#define DRM_IOCTL_I915_GEM_SW_FINISH
#define DRM_IOCTL_I915_GEM_SET_TILING
#define DRM_IOCTL_I915_GEM_GET_TILING
#define DRM_IOCTL_I915_GEM_GET_APERTURE SRV_I915_GEM_GET_APERTURE
#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID
#define DRM_IOCTL_I915_GEM_MADVISE
#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE
#define DRM_IOCTL_I915_OVERLAY_ATTRS
#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY
#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY
#define DRM_IOCTL_I915_GEM_WAIT
#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE
#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY
#define DRM_IOCTL_I915_REG_READ
 
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
*/
typedef struct drm_i915_batchbuffer {
int start; /* agp offset */
int used; /* nr bytes in use */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */
} drm_i915_batchbuffer_t;
 
/* As above, but pass a pointer to userspace buffer which can be
* validated by the kernel prior to sending to hardware.
*/
typedef struct _drm_i915_cmdbuffer {
char *buf; /* pointer to userspace command buffer */
int sz; /* nr bytes in buf */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */
} drm_i915_cmdbuffer_t;
 
/* Userspace can request & wait on irq's:
*/
typedef struct drm_i915_irq_emit {
int *irq_seq;
} drm_i915_irq_emit_t;
 
typedef struct drm_i915_irq_wait {
int irq_seq;
} drm_i915_irq_wait_t;
 
/* Ioctl to query kernel params:
*/
#define I915_PARAM_IRQ_ACTIVE 1
#define I915_PARAM_ALLOW_BATCHBUFFER 2
#define I915_PARAM_LAST_DISPATCH 3
#define I915_PARAM_CHIPSET_ID 4
#define I915_PARAM_HAS_GEM 5
#define I915_PARAM_NUM_FENCES_AVAIL 6
#define I915_PARAM_HAS_OVERLAY 7
#define I915_PARAM_HAS_PAGEFLIPPING 8
#define I915_PARAM_HAS_EXECBUF2 9
#define I915_PARAM_HAS_BSD 10
#define I915_PARAM_HAS_BLT 11
#define I915_PARAM_HAS_RELAXED_FENCING 12
#define I915_PARAM_HAS_COHERENT_RINGS 13
#define I915_PARAM_HAS_EXEC_CONSTANTS 14
#define I915_PARAM_HAS_RELAXED_DELTA 15
#define I915_PARAM_HAS_GEN7_SOL_RESET 16
#define I915_PARAM_HAS_LLC 17
#define I915_PARAM_HAS_ALIASING_PPGTT 18
#define I915_PARAM_HAS_WAIT_TIMEOUT 19
 
typedef struct drm_i915_getparam {
int param;
int *value;
} drm_i915_getparam_t;
 
/* Ioctl to set kernel params:
*/
#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
#define I915_SETPARAM_ALLOW_BATCHBUFFER 3
#define I915_SETPARAM_NUM_USED_FENCES 4
 
typedef struct drm_i915_setparam {
int param;
int value;
} drm_i915_setparam_t;
 
/* A memory manager for regions of shared memory:
*/
#define I915_MEM_REGION_AGP 1
 
typedef struct drm_i915_mem_alloc {
int region;
int alignment;
int size;
int *region_offset; /* offset from start of fb or agp */
} drm_i915_mem_alloc_t;
 
typedef struct drm_i915_mem_free {
int region;
int region_offset;
} drm_i915_mem_free_t;
 
typedef struct drm_i915_mem_init_heap {
int region;
int size;
int start;
} drm_i915_mem_init_heap_t;
 
/* Allow memory manager to be torn down and re-initialized (eg on
* rotate):
*/
typedef struct drm_i915_mem_destroy_heap {
int region;
} drm_i915_mem_destroy_heap_t;
 
/* Allow X server to configure which pipes to monitor for vblank signals
*/
#define DRM_I915_VBLANK_PIPE_A 1
#define DRM_I915_VBLANK_PIPE_B 2
 
typedef struct drm_i915_vblank_pipe {
int pipe;
} drm_i915_vblank_pipe_t;
 
/* Schedule buffer swap at given vertical blank:
*/
typedef struct drm_i915_vblank_swap {
drm_drawable_t drawable;
enum drm_vblank_seq_type seqtype;
unsigned int sequence;
} drm_i915_vblank_swap_t;
 
typedef struct drm_i915_hws_addr {
__u64 addr;
} drm_i915_hws_addr_t;
 
struct drm_i915_gem_init {
/**
* Beginning offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_start;
/**
* Ending offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_end;
};
 
struct drm_i915_gem_create {
/**
* Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*/
__u64 size;
/**
* Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
__u32 pad;
};
 
struct drm_i915_gem_pread {
/** Handle for the object being read. */
__u32 handle;
__u32 pad;
/** Offset into the object to read from */
__u64 offset;
/** Length of data to read */
__u64 size;
/**
* Pointer to write the data into.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
 
struct drm_i915_gem_pwrite {
/** Handle for the object being written to. */
__u32 handle;
__u32 pad;
/** Offset into the object to write to */
__u64 offset;
/** Length of data to write */
__u64 size;
/**
* Pointer to read the data from.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
 
struct drm_i915_gem_mmap {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/** Offset in the object to map. */
__u64 offset;
/**
* Length of data to map.
*
* The value will be page-aligned.
*/
__u64 size;
/**
* Returned pointer the data was mapped at.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 addr_ptr;
};
 
struct drm_i915_gem_mmap_gtt {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/**
* Fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
};
 
struct drm_i915_gem_set_domain {
/** Handle for the object */
__u32 handle;
 
/** New read domains */
__u32 read_domains;
 
/** New write domain */
__u32 write_domain;
};
 
struct drm_i915_gem_sw_finish {
/** Handle for the object */
__u32 handle;
};
 
struct drm_i915_gem_relocation_entry {
/**
* Handle of the buffer being pointed to by this relocation entry.
*
* It's appealing to make this be an index into the mm_validate_entry
* list to refer to the buffer, but this allows the driver to create
* a relocation list for state buffers and not re-write it per
* exec using the buffer.
*/
__u32 target_handle;
 
/**
* Value to be added to the offset of the target buffer to make up
* the relocation entry.
*/
__u32 delta;
 
/** Offset in the buffer the relocation entry will be written into */
__u64 offset;
 
/**
* Offset value of the target buffer that the relocation entry was last
* written as.
*
* If the buffer has the same offset as last time, we can skip syncing
* and writing the relocation. This value is written back out by
* the execbuffer ioctl when the relocation is written.
*/
__u64 presumed_offset;
 
/**
* Target memory domains read by this operation.
*/
__u32 read_domains;
 
/**
* Target memory domains written by this operation.
*
* Note that only one domain may be written by the whole
* execbuffer operation, so that where there are conflicts,
* the application will get -EINVAL back.
*/
__u32 write_domain;
};
 
/** @{
* Intel memory domains
*
* Most of these just align with the various caches in
* the system and are used to flush and invalidate as
* objects end up cached in different domains.
*/
/** CPU cache */
#define I915_GEM_DOMAIN_CPU 0x00000001
/** Render cache, used by 2D and 3D drawing */
#define I915_GEM_DOMAIN_RENDER 0x00000002
/** Sampler cache, used by texture engine */
#define I915_GEM_DOMAIN_SAMPLER 0x00000004
/** Command queue, used to load batch buffers */
#define I915_GEM_DOMAIN_COMMAND 0x00000008
/** Instruction cache, used by shader programs */
#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010
/** Vertex address cache */
#define I915_GEM_DOMAIN_VERTEX 0x00000020
/** GTT domain - aperture and scanout */
#define I915_GEM_DOMAIN_GTT 0x00000040
/** @} */
 
struct drm_i915_gem_exec_object {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
 
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
 
/** Required alignment in graphics aperture */
__u64 alignment;
 
/**
* Returned value of the updated offset of the object, for future
* presumed_offset writes.
*/
__u64 offset;
};
 
struct drm_i915_gem_execbuffer {
/**
* List of buffers to be validated with their relocations to be
* performend on them.
*
* This is a pointer to an array of struct drm_i915_gem_validate_entry.
*
* These buffers must be listed in an order such that all relocations
* a buffer is performing refer to buffers that have already appeared
* in the validate list.
*/
__u64 buffers_ptr;
__u32 buffer_count;
 
/** Offset in the batchbuffer to start execution from. */
__u32 batch_start_offset;
/** Bytes used in batchbuffer from batch_start_offset */
__u32 batch_len;
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
};
 
struct drm_i915_gem_exec_object2 {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
 
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
 
/** Required alignment in graphics aperture */
__u64 alignment;
 
/**
* Returned value of the updated offset of the object, for future
* presumed_offset writes.
*/
__u64 offset;
 
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
__u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
 
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
*/
__u64 buffers_ptr;
__u32 buffer_count;
 
/** Offset in the batchbuffer to start execution from. */
__u32 batch_start_offset;
/** Bytes used in batchbuffer from batch_start_offset */
__u32 batch_len;
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (7<<0)
#define I915_EXEC_DEFAULT (0<<0)
#define I915_EXEC_RENDER (1<<0)
#define I915_EXEC_BSD (2<<0)
#define I915_EXEC_BLT (3<<0)
 
/* Used for switching the constants addressing mode on gen4+ RENDER ring.
* Gen6+ only supports relative addressing to dynamic state (default) and
* absolute addressing.
*
* These flags are ignored for the BSD and BLT rings.
*/
#define I915_EXEC_CONSTANTS_MASK (3<<6)
#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
__u64 flags;
__u64 rsvd1; /* now used for context info */
__u64 rsvd2;
};
 
/** Resets the SO write offset registers for transform feedback on gen7. */
#define I915_EXEC_GEN7_SOL_RESET (1<<8)
 
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
__u32 pad;
 
/** alignment required within the aperture */
__u64 alignment;
 
/** Returned GTT offset of the buffer. */
__u64 offset;
};
 
struct drm_i915_gem_unpin {
/** Handle of the buffer to be unpinned. */
__u32 handle;
__u32 pad;
};
 
struct drm_i915_gem_busy {
/** Handle of the buffer to check for busy */
__u32 handle;
 
/** Return busy status (1 if busy, 0 if idle).
* The high word is used to indicate on which rings the object
* currently resides:
* 16:31 - busy (r or r/w) rings (16 render, 17 bsd, 18 blt, etc)
*/
__u32 busy;
};
 
#define I915_CACHEING_NONE 0
#define I915_CACHEING_CACHED 1
 
struct drm_i915_gem_cacheing {
/**
* Handle of the buffer to set/get the cacheing level of. */
__u32 handle;
 
/**
* Cacheing level to apply or return value
*
* bits0-15 are for generic cacheing control (i.e. the above defined
* values). bits16-31 are reserved for platform-specific variations
* (e.g. l3$ caching on gen7). */
__u32 cacheing;
};
 
#define I915_TILING_NONE 0
#define I915_TILING_X 1
#define I915_TILING_Y 2
 
#define I915_BIT_6_SWIZZLE_NONE 0
#define I915_BIT_6_SWIZZLE_9 1
#define I915_BIT_6_SWIZZLE_9_10 2
#define I915_BIT_6_SWIZZLE_9_11 3
#define I915_BIT_6_SWIZZLE_9_10_11 4
/* Not seen by userland */
#define I915_BIT_6_SWIZZLE_UNKNOWN 5
/* Seen by userland. */
#define I915_BIT_6_SWIZZLE_9_17 6
#define I915_BIT_6_SWIZZLE_9_10_17 7
 
struct drm_i915_gem_set_tiling {
/** Handle of the buffer to have its tiling state updated */
__u32 handle;
 
/**
* Tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*
* This value is to be set on request, and will be updated by the
* kernel on successful return with the actual chosen tiling layout.
*
* The tiling mode may be demoted to I915_TILING_NONE when the system
* has bit 6 swizzling that can't be managed correctly by GEM.
*
* Buffer contents become undefined when changing tiling_mode.
*/
__u32 tiling_mode;
 
/**
* Stride in bytes for the object when in I915_TILING_X or
* I915_TILING_Y.
*/
__u32 stride;
 
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
};
 
struct drm_i915_gem_get_tiling {
/** Handle of the buffer to get tiling state for. */
__u32 handle;
 
/**
* Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*/
__u32 tiling_mode;
 
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
};
 
struct drm_i915_gem_get_aperture {
/** Total size of the aperture used by i915_gem_execbuffer, in bytes */
__u64 aper_size;
 
/**
* Available space in the aperture used by i915_gem_execbuffer, in
* bytes
*/
__u64 aper_available_size;
};
 
struct drm_i915_get_pipe_from_crtc_id {
/** ID of CRTC being requested **/
__u32 crtc_id;
 
/** pipe of requested CRTC **/
__u32 pipe;
};
 
#define I915_MADV_WILLNEED 0
#define I915_MADV_DONTNEED 1
#define __I915_MADV_PURGED 2 /* internal state */
 
struct drm_i915_gem_madvise {
/** Handle of the buffer to change the backing store advice */
__u32 handle;
 
/* Advice: either the buffer will be needed again in the near future,
* or wont be and could be discarded under memory pressure.
*/
__u32 madv;
 
/** Whether the backing store still exists. */
__u32 retained;
};
 
/* flags */
#define I915_OVERLAY_TYPE_MASK 0xff
#define I915_OVERLAY_YUV_PLANAR 0x01
#define I915_OVERLAY_YUV_PACKED 0x02
#define I915_OVERLAY_RGB 0x03
 
#define I915_OVERLAY_DEPTH_MASK 0xff00
#define I915_OVERLAY_RGB24 0x1000
#define I915_OVERLAY_RGB16 0x2000
#define I915_OVERLAY_RGB15 0x3000
#define I915_OVERLAY_YUV422 0x0100
#define I915_OVERLAY_YUV411 0x0200
#define I915_OVERLAY_YUV420 0x0300
#define I915_OVERLAY_YUV410 0x0400
 
#define I915_OVERLAY_SWAP_MASK 0xff0000
#define I915_OVERLAY_NO_SWAP 0x000000
#define I915_OVERLAY_UV_SWAP 0x010000
#define I915_OVERLAY_Y_SWAP 0x020000
#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000
 
#define I915_OVERLAY_FLAGS_MASK 0xff000000
#define I915_OVERLAY_ENABLE 0x01000000
 
struct drm_intel_overlay_put_image {
/* various flags and src format description */
__u32 flags;
/* source picture description */
__u32 bo_handle;
/* stride values and offsets are in bytes, buffer relative */
__u16 stride_Y; /* stride for packed formats */
__u16 stride_UV;
__u32 offset_Y; /* offset for packet formats */
__u32 offset_U;
__u32 offset_V;
/* in pixels */
__u16 src_width;
__u16 src_height;
/* to compensate the scaling factors for partially covered surfaces */
__u16 src_scan_width;
__u16 src_scan_height;
/* output crtc description */
__u32 crtc_id;
__u16 dst_x;
__u16 dst_y;
__u16 dst_width;
__u16 dst_height;
};
 
/* flags */
#define I915_OVERLAY_UPDATE_ATTRS (1<<0)
#define I915_OVERLAY_UPDATE_GAMMA (1<<1)
struct drm_intel_overlay_attrs {
__u32 flags;
__u32 color_key;
__s32 brightness;
__u32 contrast;
__u32 saturation;
__u32 gamma0;
__u32 gamma1;
__u32 gamma2;
__u32 gamma3;
__u32 gamma4;
__u32 gamma5;
};
 
/*
* Intel sprite handling
*
* Color keying works with a min/mask/max tuple. Both source and destination
* color keying is allowed.
*
* Source keying:
* Sprite pixels within the min & max values, masked against the color channels
* specified in the mask field, will be transparent. All other pixels will
* be displayed on top of the primary plane. For RGB surfaces, only the min
* and mask fields will be used; ranged compares are not allowed.
*
* Destination keying:
* Primary plane pixels that match the min value, masked against the color
* channels specified in the mask field, will be replaced by corresponding
* pixels from the sprite plane.
*
* Note that source & destination keying are exclusive; only one can be
* active on a given plane.
*/
 
#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */
#define I915_SET_COLORKEY_DESTINATION (1<<1)
#define I915_SET_COLORKEY_SOURCE (1<<2)
struct drm_intel_sprite_colorkey {
__u32 plane_id;
__u32 min_value;
__u32 channel_mask;
__u32 max_value;
__u32 flags;
};
 
struct drm_i915_gem_wait {
/** Handle of BO we shall wait on */
__u32 bo_handle;
__u32 flags;
/** Number of nanoseconds to wait, Returns time remaining. */
__s64 timeout_ns;
};
 
struct drm_i915_gem_context_create {
/* output: id of new context*/
__u32 ctx_id;
__u32 pad;
};
 
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
__u32 pad;
};
 
struct drm_i915_reg_read {
__u64 offset;
__u64 val; /* Return value */
};
 
struct drm_i915_mask_update {
__u32 handle;
__u32 width;
__u32 height;
__u32 bo_size;
__u32 bo_pitch;
__u32 bo_map;
};
 
#endif /* _I915_DRM_H_ */
/drivers/video/Intel-2D/sna_reg.h
0,0 → 1,82
#ifndef SNA_REG_H
#define SNA_REG_H
 
/* Flush */
#define MI_FLUSH (0x04<<23)
#define MI_FLUSH_DW (0x26<<23)
 
#define MI_WRITE_DIRTY_STATE (1<<4)
#define MI_END_SCENE (1<<3)
#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2)
#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
#define MI_INVALIDATE_MAP_CACHE (1<<0)
/* broadwater flush bits */
#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3)
 
#define MI_BATCH_BUFFER_END (0xA << 23)
 
/* Noop */
#define MI_NOOP 0x00
#define MI_NOOP_WRITE_ID (1<<22)
#define MI_NOOP_ID_MASK (1<<22 - 1)
 
/* Wait for Events */
#define MI_WAIT_FOR_EVENT (0x03<<23)
#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18)
#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17)
#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7)
#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5)
#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3)
#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1)
 
/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */
#define MI_LOAD_SCAN_LINES_INCL (0x12<<23)
#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0)
#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20)
 
/* BLT commands */
#define BLT_WRITE_ALPHA (1<<21)
#define BLT_WRITE_RGB (1<<20)
#define BLT_SRC_TILED (1<<15)
#define BLT_DST_TILED (1<<11)
 
#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4))
#define XY_SETUP_BLT ((2<<29)|(1<<22)|6)
#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7)
#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1)
#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1)
#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16))
#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4)
#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
 
/* FLUSH commands */
#define BRW_3D(Pipeline,Opcode,Subopcode) \
((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
#define PIPE_CONTROL BRW_3D(3, 2, 0)
#define PIPE_CONTROL_NOWRITE (0 << 14)
#define PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define PIPE_CONTROL_WRITE_TIME (3 << 14)
#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define PIPE_CONTROL_WC_FLUSH (1 << 12)
#define PIPE_CONTROL_IS_FLUSH (1 << 11)
#define PIPE_CONTROL_TC_FLUSH (1 << 10)
#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
#endif
/drivers/video/Intel-2D/sna_render_inline.h
0,0 → 1,71
#ifndef SNA_RENDER_INLINE_H
#define SNA_RENDER_INLINE_H
 
static inline bool need_tiling(struct sna *sna, int16_t width, int16_t height)
{
/* Is the damage area too large to fit in 3D pipeline,
* and so do we need to split the operation up into tiles?
*/
return (width > sna->render.max_3d_size ||
height > sna->render.max_3d_size);
}
 
static inline bool need_redirect(struct sna *sna, PixmapPtr dst)
{
/* Is the pixmap too large to render to? */
return (dst->drawable.width > sna->render.max_3d_size ||
dst->drawable.height > sna->render.max_3d_size);
}
 
static inline float pack_2s(int16_t x, int16_t y)
{
union {
struct sna_coordinate p;
float f;
} u;
u.p.x = x;
u.p.y = y;
return u.f;
}
 
static inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
}
static inline void vertex_emit(struct sna *sna, float v)
{
assert(sna->render.vertex_used < sna->render.vertex_size);
sna->render.vertices[sna->render.vertex_used++] = v;
}
static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
vertex_emit(sna, pack_2s(x, y));
}
 
static inline int batch_space(struct sna *sna)
{
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
}
 
static inline void batch_emit(struct sna *sna, uint32_t dword)
{
assert(sna->kgem.mode != KGEM_NONE);
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
sna->kgem.batch[sna->kgem.nbatch++] = dword;
}
 
static inline void batch_emit_float(struct sna *sna, float f)
{
union {
uint32_t dw;
float f;
} u;
u.f = f;
batch_emit(sna, u.dw);
}
 
 
 
#endif /* SNA_RENDER_INLINE_H */
/drivers/video/Intel-2D/sna_stream.c
0,0 → 1,140
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#include "sna.h"
#include "sna_render.h"
#include "brw/brw.h"
 
int sna_static_stream_init(struct sna_static_stream *stream)
{
stream->used = 0;
stream->size = 64*1024;
 
stream->data = malloc(stream->size);
return stream->data != NULL;
}
 
static uint32_t sna_static_stream_alloc(struct sna_static_stream *stream,
uint32_t len, uint32_t align)
{
uint32_t offset = ALIGN(stream->used, align);
uint32_t size = offset + len;
 
if (size > stream->size) {
do
stream->size *= 2;
while (stream->size < size);
 
stream->data = realloc(stream->data, stream->size);
}
 
stream->used = size;
return offset;
}
 
uint32_t sna_static_stream_add(struct sna_static_stream *stream,
const void *data, uint32_t len, uint32_t align)
{
uint32_t offset = sna_static_stream_alloc(stream, len, align);
memcpy(stream->data + offset, data, len);
return offset;
}
 
void *sna_static_stream_map(struct sna_static_stream *stream,
uint32_t len, uint32_t align)
{
uint32_t offset = sna_static_stream_alloc(stream, len, align);
return memset(stream->data + offset, 0, len);
}
 
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr)
{
return (uint8_t *)ptr - stream->data;
}
 
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream)
{
struct kgem_bo *bo;
 
DBG(("uploaded %d bytes of static state\n", stream->used));
 
bo = kgem_create_linear(&sna->kgem, stream->used, 0);
if (bo && !kgem_bo_write(&sna->kgem, bo, stream->data, stream->used)) {
kgem_bo_destroy(&sna->kgem, bo);
return NULL;
}
 
free(stream->data);
 
return bo;
}
 
unsigned
sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *))
{
struct brw_compile p;
 
brw_compile_init(&p, sna->kgem.gen,
sna_static_stream_map(stream,
64*sizeof(uint32_t), 64));
 
if (!compile(&p)) {
stream->used -= 64*sizeof(uint32_t);
return 0;
}
 
assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t));
 
stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
return sna_static_stream_offsetof(stream, p.store);
}
 
unsigned
sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *, int),
int dispatch_width)
{
struct brw_compile p;
 
brw_compile_init(&p, sna->kgem.gen,
sna_static_stream_map(stream,
256*sizeof(uint32_t), 64));
 
if (!compile(&p, dispatch_width)) {
stream->used -= 256*sizeof(uint32_t);
return 0;
}
 
assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t));
 
stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
return sna_static_stream_offsetof(stream, p.store);
}
/drivers/video/Intel-2D/gen4_source.h
0,0 → 1,22
#ifndef GEN4_SOURCE_H
#define GEN4_SOURCE_H
 
#include "compiler.h"
 
#include "sna.h"
#include "sna_render.h"
 
bool
gen4_channel_init_solid(struct sna *sna,
struct sna_composite_channel *channel,
uint32_t color);
 
bool
gen4_channel_init_linear(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y);
 
#endif /* GEN4_SOURCE_H */
/drivers/video/Intel-2D/gen4_vertex.c
0,0 → 1,304
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_render.h"
#include "sna_render_inline.h"
#include "gen4_vertex.h"
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
 
assert(sna->render.vertex_offset);
assert(sna->render.vertex_index > sna->render.vertex_start);
 
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render.vertex_offset = 0;
}
 
int gen4_vertex_finish(struct sna *sna)
{
struct kgem_bo *bo;
unsigned int i;
unsigned hint, size;
 
DBG(("%s: used=%d / %d\n", __FUNCTION__,
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
 
sna_vertex_wait__locked(&sna->render);
 
/* Note: we only need dword alignment (currently) */
 
bo = sna->render.vbo;
if (bo) {
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
 
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
}
 
assert(!sna->render.active);
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render.vbo = NULL;
sna->render.vb_id = 0;
 
kgem_bo_destroy(&sna->kgem, bo);
}
 
hint = CREATE_GTT_MAP;
if (bo)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
 
size = 256*1024;
assert(!sna->render.active);
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
while (sna->render.vbo == NULL && size > 16*1024) {
size /= 2;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
}
if (sna->render.vbo == NULL)
sna->render.vbo = kgem_create_linear(&sna->kgem,
256*1024, CREATE_GTT_MAP);
if (sna->render.vbo)
sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
if (sna->render.vbo) {
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
}
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
return 0;
}
 
if (sna->render.vertex_used) {
DBG(("%s: copying initial buffer x %d to handle=%d\n",
__FUNCTION__,
sna->render.vertex_used,
sna->render.vbo->handle));
assert(sizeof(float)*sna->render.vertex_used <=
__kgem_bo_size(sna->render.vbo));
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
}
 
size = __kgem_bo_size(sna->render.vbo)/4;
if (size >= UINT16_MAX)
size = UINT16_MAX - 1;
 
DBG(("%s: create vbo handle=%d, size=%d\n",
__FUNCTION__, sna->render.vbo->handle, size));
 
sna->render.vertex_size = size;
return sna->render.vertex_size - sna->render.vertex_used;
}
 
void gen4_vertex_close(struct sna *sna)
{
struct kgem_bo *bo, *free_bo = NULL;
unsigned int i, delta = 0;
 
assert(sna->render.vertex_offset == 0);
if (!sna->render.vb_id)
return;
 
DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
sna->render.vb_id, sna->render.nvertex_reloc));
 
assert(!sna->render.active);
 
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices =
kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
}
 
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
memcpy(sna->kgem.batch + sna->kgem.nbatch,
sna->render.vertex_data,
sna->render.vertex_used * 4);
delta = sna->kgem.nbatch * 4;
bo = NULL;
sna->kgem.nbatch += sna->render.vertex_used;
} else {
bo = kgem_create_linear(&sna->kgem,
4*sna->render.vertex_used,
CREATE_NO_THROTTLE);
if (bo && !kgem_bo_write(&sna->kgem, bo,
sna->render.vertex_data,
4*sna->render.vertex_used)) {
kgem_bo_destroy(&sna->kgem, bo);
bo = NULL;
}
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
free_bo = bo;
}
}
 
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
 
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
}
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
 
if (sna->render.vbo == NULL) {
assert(!sna->render.active);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
}
 
if (free_bo)
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
 
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = (src_x + w) * op->src.scale[0];
v[2] = (src_y + h) * op->src.scale[1];
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = src_x * op->src.scale[0];
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = src_y * op->src.scale[1];
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp)
{
unsigned vb;
 
if (tmp->mask.bo) {
if (tmp->mask.transform == NULL) {
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source_mask;
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else if (tmp->src.is_affine) {
DBG(("%s: simple src, identity mask\n", __FUNCTION__));
} else {
DBG(("%s: projective source, identity mask\n", __FUNCTION__));
}
} else {
DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
__FUNCTION__,tmp->floats_per_vertex, vb));
}
} else {
}
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
return vb;
}
/drivers/video/Intel-2D/gen5_render.h
0,0 → 1,2772
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef GEN5_RENDER_H
#define GEN5_RENDER_H
 
#define GEN5_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN5_URB_FENCE GEN5_3D(0, 0, 0)
#define GEN5_CS_URB_STATE GEN5_3D(0, 0, 1)
#define GEN5_CONSTANT_BUFFER GEN5_3D(0, 0, 2)
#define GEN5_STATE_PREFETCH GEN5_3D(0, 0, 3)
 
#define GEN5_STATE_BASE_ADDRESS GEN5_3D(0, 1, 1)
#define GEN5_STATE_SIP GEN5_3D(0, 1, 2)
 
#define GEN5_PIPELINE_SELECT GEN5_3D(1, 1, 4)
 
#define GEN5_MEDIA_STATE_POINTERS GEN5_3D(2, 0, 0)
#define GEN5_MEDIA_OBJECT GEN5_3D(2, 1, 0)
 
#define GEN5_3DSTATE_PIPELINED_POINTERS GEN5_3D(3, 0, 0)
#define GEN5_3DSTATE_BINDING_TABLE_POINTERS GEN5_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN5_3DSTATE_VERTEX_BUFFERS GEN5_3D(3, 0, 8)
#define GEN5_3DSTATE_VERTEX_ELEMENTS GEN5_3D(3, 0, 9)
#define GEN5_3DSTATE_INDEX_BUFFER GEN5_3D(3, 0, 0xa)
#define GEN5_3DSTATE_VF_STATISTICS GEN5_3D(3, 0, 0xb)
 
#define GEN5_3DSTATE_DRAWING_RECTANGLE GEN5_3D(3, 1, 0)
#define GEN5_3DSTATE_CONSTANT_COLOR GEN5_3D(3, 1, 1)
#define GEN5_3DSTATE_SAMPLER_PALETTE_LOAD GEN5_3D(3, 1, 2)
#define GEN5_3DSTATE_CHROMA_KEY GEN5_3D(3, 1, 4)
#define GEN5_3DSTATE_DEPTH_BUFFER GEN5_3D(3, 1, 5)
# define GEN5_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN5_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN5_3DSTATE_POLY_STIPPLE_OFFSET GEN5_3D(3, 1, 6)
#define GEN5_3DSTATE_POLY_STIPPLE_PATTERN GEN5_3D(3, 1, 7)
#define GEN5_3DSTATE_LINE_STIPPLE GEN5_3D(3, 1, 8)
#define GEN5_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN5_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN5_3DSTATE_AA_LINE_PARAMS GEN5_3D(3, 1, 0xa)
#define GEN5_3DSTATE_GS_SVB_INDEX GEN5_3D(3, 1, 0xb)
 
#define GEN5_PIPE_CONTROL GEN5_3D(3, 2, 0)
 
#define GEN5_3DPRIMITIVE GEN5_3D(3, 3, 0)
 
#define GEN5_3DSTATE_CLEAR_PARAMS GEN5_3D(3, 1, 0x10)
/* DW1 */
# define GEN5_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN5_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN5_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN5_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN5_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN5_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN5_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN5_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN5_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
 
#define GEN6_3DSTATE_WM GEN5_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN5_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN5_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN5_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN5_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN5_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN5_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN5_3DSTATE_PIPELINED_POINTERS */
#define GEN5_GS_DISABLE 0
#define GEN5_GS_ENABLE 1
#define GEN5_CLIP_DISABLE 0
#define GEN5_CLIP_ENABLE 1
 
/* for GEN5_PIPE_CONTROL */
#define GEN5_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN5_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN5_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN5_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN5_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN5_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN5_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN5_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN5_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN5_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN5_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN5_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define GEN6_VB0_VERTEXDATA (0 << 20)
#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN5_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen5_defines.h */
#define GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN5_SVG_CTL 0x7400
 
#define GEN5_SVG_CTL_GS_BA (0 << 8)
#define GEN5_SVG_CTL_SS_BA (1 << 8)
#define GEN5_SVG_CTL_IO_BA (2 << 8)
#define GEN5_SVG_CTL_GS_AUB (3 << 8)
#define GEN5_SVG_CTL_IO_AUB (4 << 8)
#define GEN5_SVG_CTL_SIP (5 << 8)
 
#define GEN5_SVG_RDATA 0x7404
#define GEN5_SVG_WORK_CTL 0x7408
 
#define GEN5_VF_CTL 0x7500
 
#define GEN5_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN5_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN5_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN5_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN5_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_VF_STRG_VAL 0x7504
#define GEN5_VF_STR_VL_OVR 0x7508
#define GEN5_VF_VC_OVR 0x750c
#define GEN5_VF_STR_PSKIP 0x7510
#define GEN5_VF_MAX_PRIM 0x7514
#define GEN5_VF_RDATA 0x7518
 
#define GEN5_VS_CTL 0x7600
#define GEN5_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN5_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_VS_STRG_VAL 0x7604
#define GEN5_VS_RDATA 0x7608
 
#define GEN5_SF_CTL 0x7b00
#define GEN5_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN5_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN5_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN5_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_SF_STRG_VAL 0x7b04
#define GEN5_SF_RDATA 0x7b18
 
#define GEN5_WIZ_CTL 0x7c00
#define GEN5_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN5_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN5_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN5_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN5_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN5_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_WIZ_STRG_VAL 0x7c04
#define GEN5_WIZ_RDATA 0x7c18
 
#define GEN5_TS_CTL 0x7e00
#define GEN5_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN5_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN5_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN5_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN5_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_TS_STRG_VAL 0x7e04
#define GEN5_TS_RDATA 0x7e08
 
#define GEN5_TD_CTL 0x8000
#define GEN5_TD_CTL_MUX_SHIFT 8
#define GEN5_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN5_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN5_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN5_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN5_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN5_TD_CTL2 0x8004
#define GEN5_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN5_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN5_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN5_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN5_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN5_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN5_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN5_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN5_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN5_TD_VF_VS_EMSK 0x8008
#define GEN5_TD_GS_EMSK 0x800c
#define GEN5_TD_CLIP_EMSK 0x8010
#define GEN5_TD_SF_EMSK 0x8014
#define GEN5_TD_WIZ_EMSK 0x8018
#define GEN5_TD_0_6_EHTRG_VAL 0x801c
#define GEN5_TD_0_7_EHTRG_VAL 0x8020
#define GEN5_TD_0_6_EHTRG_MSK 0x8024
#define GEN5_TD_0_7_EHTRG_MSK 0x8028
#define GEN5_TD_RDATA 0x802c
#define GEN5_TD_TS_EMSK 0x8030
 
#define GEN5_EU_CTL 0x8800
#define GEN5_EU_CTL_SELECT_SHIFT 16
#define GEN5_EU_CTL_DATA_MUX_SHIFT 8
#define GEN5_EU_ATT_0 0x8810
#define GEN5_EU_ATT_1 0x8814
#define GEN5_EU_ATT_DATA_0 0x8820
#define GEN5_EU_ATT_DATA_1 0x8824
#define GEN5_EU_ATT_CLR_0 0x8830
#define GEN5_EU_ATT_CLR_1 0x8834
#define GEN5_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN5_ANISORATIO_2 0
#define GEN5_ANISORATIO_4 1
#define GEN5_ANISORATIO_6 2
#define GEN5_ANISORATIO_8 3
#define GEN5_ANISORATIO_10 4
#define GEN5_ANISORATIO_12 5
#define GEN5_ANISORATIO_14 6
#define GEN5_ANISORATIO_16 7
 
#define GEN5_BLENDFACTOR_ONE 0x1
#define GEN5_BLENDFACTOR_SRC_COLOR 0x2
#define GEN5_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN5_BLENDFACTOR_DST_ALPHA 0x4
#define GEN5_BLENDFACTOR_DST_COLOR 0x5
#define GEN5_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN5_BLENDFACTOR_CONST_COLOR 0x7
#define GEN5_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN5_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN5_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN5_BLENDFACTOR_ZERO 0x11
#define GEN5_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN5_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN5_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN5_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN5_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN5_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN5_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN5_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN5_BLENDFUNCTION_ADD 0
#define GEN5_BLENDFUNCTION_SUBTRACT 1
#define GEN5_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN5_BLENDFUNCTION_MIN 3
#define GEN5_BLENDFUNCTION_MAX 4
 
#define GEN5_ALPHATEST_FORMAT_UNORM8 0
#define GEN5_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN5_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN5_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN5_CLIP_API_OGL 0
#define GEN5_CLIP_API_DX 1
 
#define GEN5_CLIPMODE_NORMAL 0
#define GEN5_CLIPMODE_CLIP_ALL 1
#define GEN5_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN5_CLIPMODE_REJECT_ALL 3
#define GEN5_CLIPMODE_ACCEPT_ALL 4
 
#define GEN5_CLIP_NDCSPACE 0
#define GEN5_CLIP_SCREENSPACE 1
 
#define GEN5_COMPAREFUNCTION_ALWAYS 0
#define GEN5_COMPAREFUNCTION_NEVER 1
#define GEN5_COMPAREFUNCTION_LESS 2
#define GEN5_COMPAREFUNCTION_EQUAL 3
#define GEN5_COMPAREFUNCTION_LEQUAL 4
#define GEN5_COMPAREFUNCTION_GREATER 5
#define GEN5_COMPAREFUNCTION_NOTEQUAL 6
#define GEN5_COMPAREFUNCTION_GEQUAL 7
 
#define GEN5_COVERAGE_PIXELS_HALF 0
#define GEN5_COVERAGE_PIXELS_1 1
#define GEN5_COVERAGE_PIXELS_2 2
#define GEN5_COVERAGE_PIXELS_4 3
 
#define GEN5_CULLMODE_BOTH 0
#define GEN5_CULLMODE_NONE 1
#define GEN5_CULLMODE_FRONT 2
#define GEN5_CULLMODE_BACK 3
 
#define GEN5_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN5_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN5_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN5_DEPTHFORMAT_D32_FLOAT 1
#define GEN5_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN5_DEPTHFORMAT_D16_UNORM 5
 
#define GEN5_FLOATING_POINT_IEEE_754 0
#define GEN5_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN5_FRONTWINDING_CW 0
#define GEN5_FRONTWINDING_CCW 1
 
#define GEN5_INDEX_BYTE 0
#define GEN5_INDEX_WORD 1
#define GEN5_INDEX_DWORD 2
 
#define GEN5_LOGICOPFUNCTION_CLEAR 0
#define GEN5_LOGICOPFUNCTION_NOR 1
#define GEN5_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN5_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN5_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN5_LOGICOPFUNCTION_INVERT 5
#define GEN5_LOGICOPFUNCTION_XOR 6
#define GEN5_LOGICOPFUNCTION_NAND 7
#define GEN5_LOGICOPFUNCTION_AND 8
#define GEN5_LOGICOPFUNCTION_EQUIV 9
#define GEN5_LOGICOPFUNCTION_NOOP 10
#define GEN5_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN5_LOGICOPFUNCTION_COPY 12
#define GEN5_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN5_LOGICOPFUNCTION_OR 14
#define GEN5_LOGICOPFUNCTION_SET 15
 
#define GEN5_MAPFILTER_NEAREST 0x0
#define GEN5_MAPFILTER_LINEAR 0x1
#define GEN5_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN5_MIPFILTER_NONE 0
#define GEN5_MIPFILTER_NEAREST 1
#define GEN5_MIPFILTER_LINEAR 3
 
#define GEN5_POLYGON_FRONT_FACING 0
#define GEN5_POLYGON_BACK_FACING 1
 
#define GEN5_PREFILTER_ALWAYS 0x0
#define GEN5_PREFILTER_NEVER 0x1
#define GEN5_PREFILTER_LESS 0x2
#define GEN5_PREFILTER_EQUAL 0x3
#define GEN5_PREFILTER_LEQUAL 0x4
#define GEN5_PREFILTER_GREATER 0x5
#define GEN5_PREFILTER_NOTEQUAL 0x6
#define GEN5_PREFILTER_GEQUAL 0x7
 
#define GEN5_PROVOKING_VERTEX_0 0
#define GEN5_PROVOKING_VERTEX_1 1
#define GEN5_PROVOKING_VERTEX_2 2
 
#define GEN5_RASTRULE_UPPER_LEFT 0
#define GEN5_RASTRULE_UPPER_RIGHT 1
 
#define GEN5_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN5_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN5_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN5_STENCILOP_KEEP 0
#define GEN5_STENCILOP_ZERO 1
#define GEN5_STENCILOP_REPLACE 2
#define GEN5_STENCILOP_INCRSAT 3
#define GEN5_STENCILOP_DECRSAT 4
#define GEN5_STENCILOP_INCR 5
#define GEN5_STENCILOP_DECR 6
#define GEN5_STENCILOP_INVERT 7
 
#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN5_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN5_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN5_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN5_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN5_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN5_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN5_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN5_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN5_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN5_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN5_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN5_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN5_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN5_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN5_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN5_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN5_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN5_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN5_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN5_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN5_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN5_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN5_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN5_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN5_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN5_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN5_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN5_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN5_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN5_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN5_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN5_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN5_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN5_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN5_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN5_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN5_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN5_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN5_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN5_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN5_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN5_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN5_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN5_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN5_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN5_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN5_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN5_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN5_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN5_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN5_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN5_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN5_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN5_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN5_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN5_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN5_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN5_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN5_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN5_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN5_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN5_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN5_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN5_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN5_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN5_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN5_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN5_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN5_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN5_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN5_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN5_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN5_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN5_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN5_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN5_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN5_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN5_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN5_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN5_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN5_SURFACEFORMAT_R16_SINT 0x10C
#define GEN5_SURFACEFORMAT_R16_UINT 0x10D
#define GEN5_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN5_SURFACEFORMAT_I16_UNORM 0x111
#define GEN5_SURFACEFORMAT_L16_UNORM 0x112
#define GEN5_SURFACEFORMAT_A16_UNORM 0x113
#define GEN5_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN5_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN5_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN5_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN5_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN5_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN5_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN5_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN5_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN5_SURFACEFORMAT_R8_UNORM 0x140
#define GEN5_SURFACEFORMAT_R8_SNORM 0x141
#define GEN5_SURFACEFORMAT_R8_SINT 0x142
#define GEN5_SURFACEFORMAT_R8_UINT 0x143
#define GEN5_SURFACEFORMAT_A8_UNORM 0x144
#define GEN5_SURFACEFORMAT_I8_UNORM 0x145
#define GEN5_SURFACEFORMAT_L8_UNORM 0x146
#define GEN5_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN5_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN5_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN5_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN5_SURFACEFORMAT_R1_UINT 0x181
#define GEN5_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN5_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN5_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN5_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN5_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN5_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN5_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN5_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN5_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN5_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN5_SURFACEFORMAT_MONO8 0x18E
#define GEN5_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN5_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN5_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN5_SURFACEFORMAT_FXT1 0x192
#define GEN5_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN5_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN5_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN5_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN5_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN5_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN5_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN5_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN5_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN5_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN5_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN5_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN5_SURFACERETURNFORMAT_FLOAT32 0
#define GEN5_SURFACERETURNFORMAT_S1 1
 
#define GEN5_SURFACE_1D 0
#define GEN5_SURFACE_2D 1
#define GEN5_SURFACE_3D 2
#define GEN5_SURFACE_CUBE 3
#define GEN5_SURFACE_BUFFER 4
#define GEN5_SURFACE_NULL 7
 
#define GEN5_BORDER_COLOR_MODE_DEFAULT 0
#define GEN5_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN5_TEXCOORDMODE_WRAP 0
#define GEN5_TEXCOORDMODE_MIRROR 1
#define GEN5_TEXCOORDMODE_CLAMP 2
#define GEN5_TEXCOORDMODE_CUBE 3
#define GEN5_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN5_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN5_THREAD_PRIORITY_NORMAL 0
#define GEN5_THREAD_PRIORITY_HIGH 1
 
#define GEN5_TILEWALK_XMAJOR 0
#define GEN5_TILEWALK_YMAJOR 1
 
#define GEN5_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN5_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define VFCOMPONENT_NOSTORE 0
#define VFCOMPONENT_STORE_SRC 1
#define VFCOMPONENT_STORE_0 2
#define VFCOMPONENT_STORE_1_FLT 3
#define VFCOMPONENT_STORE_1_INT 4
#define VFCOMPONENT_STORE_VID 5
#define VFCOMPONENT_STORE_IID 6
#define VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN5_ALIGN_1 0
#define GEN5_ALIGN_16 1
 
#define GEN5_ADDRESS_DIRECT 0
#define GEN5_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN5_CHANNEL_X 0
#define GEN5_CHANNEL_Y 1
#define GEN5_CHANNEL_Z 2
#define GEN5_CHANNEL_W 3
 
#define GEN5_COMPRESSION_NONE 0
#define GEN5_COMPRESSION_2NDHALF 1
#define GEN5_COMPRESSION_COMPRESSED 2
 
#define GEN5_CONDITIONAL_NONE 0
#define GEN5_CONDITIONAL_Z 1
#define GEN5_CONDITIONAL_NZ 2
#define GEN5_CONDITIONAL_EQ 1 /* Z */
#define GEN5_CONDITIONAL_NEQ 2 /* NZ */
#define GEN5_CONDITIONAL_G 3
#define GEN5_CONDITIONAL_GE 4
#define GEN5_CONDITIONAL_L 5
#define GEN5_CONDITIONAL_LE 6
#define GEN5_CONDITIONAL_C 7
#define GEN5_CONDITIONAL_O 8
 
#define GEN5_DEBUG_NONE 0
#define GEN5_DEBUG_BREAKPOINT 1
 
#define GEN5_DEPENDENCY_NORMAL 0
#define GEN5_DEPENDENCY_NOTCLEARED 1
#define GEN5_DEPENDENCY_NOTCHECKED 2
#define GEN5_DEPENDENCY_DISABLE 3
 
#define GEN5_EXECUTE_1 0
#define GEN5_EXECUTE_2 1
#define GEN5_EXECUTE_4 2
#define GEN5_EXECUTE_8 3
#define GEN5_EXECUTE_16 4
#define GEN5_EXECUTE_32 5
 
#define GEN5_HORIZONTAL_STRIDE_0 0
#define GEN5_HORIZONTAL_STRIDE_1 1
#define GEN5_HORIZONTAL_STRIDE_2 2
#define GEN5_HORIZONTAL_STRIDE_4 3
 
#define GEN5_INSTRUCTION_NORMAL 0
#define GEN5_INSTRUCTION_SATURATE 1
 
#define GEN5_MASK_ENABLE 0
#define GEN5_MASK_DISABLE 1
 
#define GEN5_OPCODE_MOV 1
#define GEN5_OPCODE_SEL 2
#define GEN5_OPCODE_NOT 4
#define GEN5_OPCODE_AND 5
#define GEN5_OPCODE_OR 6
#define GEN5_OPCODE_XOR 7
#define GEN5_OPCODE_SHR 8
#define GEN5_OPCODE_SHL 9
#define GEN5_OPCODE_RSR 10
#define GEN5_OPCODE_RSL 11
#define GEN5_OPCODE_ASR 12
#define GEN5_OPCODE_CMP 16
#define GEN5_OPCODE_JMPI 32
#define GEN5_OPCODE_IF 34
#define GEN5_OPCODE_IFF 35
#define GEN5_OPCODE_ELSE 36
#define GEN5_OPCODE_ENDIF 37
#define GEN5_OPCODE_DO 38
#define GEN5_OPCODE_WHILE 39
#define GEN5_OPCODE_BREAK 40
#define GEN5_OPCODE_CONTINUE 41
#define GEN5_OPCODE_HALT 42
#define GEN5_OPCODE_MSAVE 44
#define GEN5_OPCODE_MRESTORE 45
#define GEN5_OPCODE_PUSH 46
#define GEN5_OPCODE_POP 47
#define GEN5_OPCODE_WAIT 48
#define GEN5_OPCODE_SEND 49
#define GEN5_OPCODE_ADD 64
#define GEN5_OPCODE_MUL 65
#define GEN5_OPCODE_AVG 66
#define GEN5_OPCODE_FRC 67
#define GEN5_OPCODE_RNDU 68
#define GEN5_OPCODE_RNDD 69
#define GEN5_OPCODE_RNDE 70
#define GEN5_OPCODE_RNDZ 71
#define GEN5_OPCODE_MAC 72
#define GEN5_OPCODE_MACH 73
#define GEN5_OPCODE_LZD 74
#define GEN5_OPCODE_SAD2 80
#define GEN5_OPCODE_SADA2 81
#define GEN5_OPCODE_DP4 84
#define GEN5_OPCODE_DPH 85
#define GEN5_OPCODE_DP3 86
#define GEN5_OPCODE_DP2 87
#define GEN5_OPCODE_DPA2 88
#define GEN5_OPCODE_LINE 89
#define GEN5_OPCODE_NOP 126
 
#define GEN5_PREDICATE_NONE 0
#define GEN5_PREDICATE_NORMAL 1
#define GEN5_PREDICATE_ALIGN1_ANYV 2
#define GEN5_PREDICATE_ALIGN1_ALLV 3
#define GEN5_PREDICATE_ALIGN1_ANY2H 4
#define GEN5_PREDICATE_ALIGN1_ALL2H 5
#define GEN5_PREDICATE_ALIGN1_ANY4H 6
#define GEN5_PREDICATE_ALIGN1_ALL4H 7
#define GEN5_PREDICATE_ALIGN1_ANY8H 8
#define GEN5_PREDICATE_ALIGN1_ALL8H 9
#define GEN5_PREDICATE_ALIGN1_ANY16H 10
#define GEN5_PREDICATE_ALIGN1_ALL16H 11
#define GEN5_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN5_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN5_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN5_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN5_PREDICATE_ALIGN16_ANY4H 6
#define GEN5_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN5_ARCHITECTURE_REGISTER_FILE 0
#define GEN5_GENERAL_REGISTER_FILE 1
#define GEN5_MESSAGE_REGISTER_FILE 2
#define GEN5_IMMEDIATE_VALUE 3
 
#define GEN5_REGISTER_TYPE_UD 0
#define GEN5_REGISTER_TYPE_D 1
#define GEN5_REGISTER_TYPE_UW 2
#define GEN5_REGISTER_TYPE_W 3
#define GEN5_REGISTER_TYPE_UB 4
#define GEN5_REGISTER_TYPE_B 5
#define GEN5_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN5_REGISTER_TYPE_HF 6
#define GEN5_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN5_REGISTER_TYPE_F 7
 
#define GEN5_ARF_NULL 0x00
#define GEN5_ARF_ADDRESS 0x10
#define GEN5_ARF_ACCUMULATOR 0x20
#define GEN5_ARF_FLAG 0x30
#define GEN5_ARF_MASK 0x40
#define GEN5_ARF_MASK_STACK 0x50
#define GEN5_ARF_MASK_STACK_DEPTH 0x60
#define GEN5_ARF_STATE 0x70
#define GEN5_ARF_CONTROL 0x80
#define GEN5_ARF_NOTIFICATION_COUNT 0x90
#define GEN5_ARF_IP 0xA0
 
#define GEN5_AMASK 0
#define GEN5_IMASK 1
#define GEN5_LMASK 2
#define GEN5_CMASK 3
 
 
 
#define GEN5_THREAD_NORMAL 0
#define GEN5_THREAD_ATOMIC 1
#define GEN5_THREAD_SWITCH 2
 
#define GEN5_VERTICAL_STRIDE_0 0
#define GEN5_VERTICAL_STRIDE_1 1
#define GEN5_VERTICAL_STRIDE_2 2
#define GEN5_VERTICAL_STRIDE_4 3
#define GEN5_VERTICAL_STRIDE_8 4
#define GEN5_VERTICAL_STRIDE_16 5
#define GEN5_VERTICAL_STRIDE_32 6
#define GEN5_VERTICAL_STRIDE_64 7
#define GEN5_VERTICAL_STRIDE_128 8
#define GEN5_VERTICAL_STRIDE_256 9
#define GEN5_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN5_WIDTH_1 0
#define GEN5_WIDTH_2 1
#define GEN5_WIDTH_4 2
#define GEN5_WIDTH_8 3
#define GEN5_WIDTH_16 4
 
#define GEN5_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN5_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN5_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN5_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN5_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN5_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN5_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN5_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN5_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN5_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN5_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN5_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN5_POLYGON_FACING_FRONT 0
#define GEN5_POLYGON_FACING_BACK 1
 
#define GEN5_MESSAGE_TARGET_NULL 0
#define GEN5_MESSAGE_TARGET_MATH 1
#define GEN5_MESSAGE_TARGET_SAMPLER 2
#define GEN5_MESSAGE_TARGET_GATEWAY 3
#define GEN5_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN5_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN5_MESSAGE_TARGET_URB 6
#define GEN5_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN5_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN5_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN5_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN5_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN5_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN5_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN5_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN5_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN5_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN5_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN5_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN5_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN5_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN5_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN5_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN5_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN5_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN5_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN5_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN5_MATH_FUNCTION_INV 1
#define GEN5_MATH_FUNCTION_LOG 2
#define GEN5_MATH_FUNCTION_EXP 3
#define GEN5_MATH_FUNCTION_SQRT 4
#define GEN5_MATH_FUNCTION_RSQ 5
#define GEN5_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN5_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN5_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN5_MATH_FUNCTION_TAN 9
#define GEN5_MATH_FUNCTION_POW 10
#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN5_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN5_MATH_INTEGER_UNSIGNED 0
#define GEN5_MATH_INTEGER_SIGNED 1
 
#define GEN5_MATH_PRECISION_FULL 0
#define GEN5_MATH_PRECISION_PARTIAL 1
 
#define GEN5_MATH_SATURATE_NONE 0
#define GEN5_MATH_SATURATE_SATURATE 1
 
#define GEN5_MATH_DATA_VECTOR 0
#define GEN5_MATH_DATA_SCALAR 1
 
#define GEN5_URB_OPCODE_WRITE 0
 
#define GEN5_URB_SWIZZLE_NONE 0
#define GEN5_URB_SWIZZLE_INTERLEAVE 1
#define GEN5_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN5_SCRATCH_SPACE_SIZE_1K 0
#define GEN5_SCRATCH_SPACE_SIZE_2K 1
#define GEN5_SCRATCH_SPACE_SIZE_4K 2
#define GEN5_SCRATCH_SPACE_SIZE_8K 3
#define GEN5_SCRATCH_SPACE_SIZE_16K 4
#define GEN5_SCRATCH_SPACE_SIZE_32K 5
#define GEN5_SCRATCH_SPACE_SIZE_64K 6
#define GEN5_SCRATCH_SPACE_SIZE_128K 7
#define GEN5_SCRATCH_SPACE_SIZE_256K 8
#define GEN5_SCRATCH_SPACE_SIZE_512K 9
#define GEN5_SCRATCH_SPACE_SIZE_1M 10
#define GEN5_SCRATCH_SPACE_SIZE_2M 11
 
 
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CONST_BUFFER_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT 0x6104
 
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
#define CMD_VERTEX_ELEMENT 0x7809
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS 0x780b
 
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_3D_PRIM 0x7b00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
 
/* media pipeline */
 
#define GEN5_VFE_MODE_GENERIC 0x0
#define GEN5_VFE_MODE_VLD_MPEG2 0x1
#define GEN5_VFE_MODE_IS 0x2
#define GEN5_VFE_MODE_AVC_MC 0x4
#define GEN5_VFE_MODE_AVC_IT 0x7
#define GEN5_VFE_MODE_VC1_IT 0xB
 
#define GEN5_VFE_DEBUG_COUNTER_FREE 0
#define GEN5_VFE_DEBUG_COUNTER_FROZEN 1
#define GEN5_VFE_DEBUG_COUNTER_ONCE 2
#define GEN5_VFE_DEBUG_COUNTER_ALWAYS 3
 
/* VLD_STATE */
#define GEN5_MPEG_TOP_FIELD 1
#define GEN5_MPEG_BOTTOM_FIELD 2
#define GEN5_MPEG_FRAME 3
#define GEN5_MPEG_QSCALE_LINEAR 0
#define GEN5_MPEG_QSCALE_NONLINEAR 1
#define GEN5_MPEG_ZIGZAG_SCAN 0
#define GEN5_MPEG_ALTER_VERTICAL_SCAN 1
#define GEN5_MPEG_I_PICTURE 1
#define GEN5_MPEG_P_PICTURE 2
#define GEN5_MPEG_B_PICTURE 3
 
/* Command packets:
*/
struct header
{
unsigned int length:16;
unsigned int opcode:16;
};
 
 
union header_union
{
struct header bits;
unsigned int dword;
};
 
struct gen5_3d_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:3;
unsigned int wc_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int operation:2;
unsigned int opcode:16;
} header;
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} dest;
unsigned int dword2;
unsigned int dword3;
};
 
 
struct gen5_3d_primitive
{
struct
{
unsigned int length:8;
unsigned int pad:2;
unsigned int topology:5;
unsigned int indexed:1;
unsigned int opcode:16;
} header;
 
unsigned int verts_per_instance;
unsigned int start_vert_location;
unsigned int instance_count;
unsigned int start_instance_location;
unsigned int base_vert_location;
};
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define GEN5_FLUSH_READ_CACHE 0x1
#define GEN5_FLUSH_STATE_CACHE 0x2
#define GEN5_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define GEN5_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct gen5_mi_flush
{
unsigned int flags:4;
unsigned int pad:12;
unsigned int opcode:16;
};
 
struct gen5_vf_statistics
{
unsigned int statistics_enable:1;
unsigned int pad:15;
unsigned int opcode:16;
};
 
 
 
struct gen5_binding_table_pointers
{
struct header header;
unsigned int vs;
unsigned int gs;
unsigned int clp;
unsigned int sf;
unsigned int wm;
};
 
 
struct gen5_blend_constant_color
{
struct header header;
float blend_constant_color[4];
};
 
 
struct gen5_depthbuffer
{
union header_union header;
union {
struct {
unsigned int pitch:18;
unsigned int format:3;
unsigned int pad:4;
unsigned int depth_offset_disable:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad2:1;
unsigned int surface_type:3;
} bits;
unsigned int dword;
} dword1;
unsigned int dword2_base_addr;
union {
struct {
unsigned int pad:1;
unsigned int mipmap_layout:1;
unsigned int lod:4;
unsigned int width:13;
unsigned int height:13;
} bits;
unsigned int dword;
} dword3;
 
union {
struct {
unsigned int pad:12;
unsigned int min_array_element:9;
unsigned int depth:11;
} bits;
unsigned int dword;
} dword4;
};
 
struct gen5_drawrect
{
struct header header;
unsigned int xmin:16;
unsigned int ymin:16;
unsigned int xmax:16;
unsigned int ymax:16;
unsigned int xorg:16;
unsigned int yorg:16;
};
 
 
 
 
struct gen5_global_depth_offset_clamp
{
struct header header;
float depth_offset_clamp;
};
 
struct gen5_indexbuffer
{
union {
struct
{
unsigned int length:8;
unsigned int index_format:2;
unsigned int cut_index_enable:1;
unsigned int pad:5;
unsigned int opcode:16;
} bits;
unsigned int dword;
 
} header;
 
unsigned int buffer_start;
unsigned int buffer_end;
};
 
 
struct gen5_line_stipple
{
struct header header;
struct
{
unsigned int pattern:16;
unsigned int pad:16;
} bits0;
struct
{
unsigned int repeat_count:9;
unsigned int pad:7;
unsigned int inverse_repeat_count:16;
} bits1;
};
 
 
struct gen5_pipelined_state_pointers
{
struct header header;
struct {
unsigned int pad:5;
unsigned int offset:27;
} vs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} gs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} clp;
struct
{
unsigned int pad:5;
unsigned int offset:27;
} sf;
 
struct
{
unsigned int pad:5;
unsigned int offset:27;
} wm;
struct
{
unsigned int pad:5;
unsigned int offset:27; /* KW: check me! */
} cc;
};
 
 
struct gen5_polygon_stipple_offset
{
struct header header;
 
struct {
unsigned int y_offset:5;
unsigned int pad:3;
unsigned int x_offset:5;
unsigned int pad0:19;
} bits0;
};
 
 
 
struct gen5_polygon_stipple
{
struct header header;
unsigned int stipple[32];
};
 
 
 
struct gen5_pipeline_select
{
struct
{
unsigned int pipeline_select:1;
unsigned int pad:15;
unsigned int opcode:16;
} header;
};
 
 
struct gen5_pipe_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:2;
unsigned int instruction_state_cache_flush_enable:1;
unsigned int write_cache_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int post_sync_operation:2;
 
unsigned int opcode:16;
} header;
 
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} bits1;
 
unsigned int data0;
unsigned int data1;
};
 
 
struct gen5_urb_fence
{
struct
{
unsigned int length:8;
unsigned int vs_realloc:1;
unsigned int gs_realloc:1;
unsigned int clp_realloc:1;
unsigned int sf_realloc:1;
unsigned int vfe_realloc:1;
unsigned int cs_realloc:1;
unsigned int pad:2;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int vs_fence:10;
unsigned int gs_fence:10;
unsigned int clp_fence:10;
unsigned int pad:2;
} bits0;
 
struct
{
unsigned int sf_fence:10;
unsigned int vf_fence:10;
unsigned int cs_fence:10;
unsigned int pad:2;
} bits1;
};
 
struct gen5_constant_buffer_state /* previously gen5_command_streamer */
{
struct header header;
 
struct
{
unsigned int nr_urb_entries:3;
unsigned int pad:1;
unsigned int urb_entry_size:5;
unsigned int pad0:23;
} bits0;
};
 
struct gen5_constant_buffer
{
struct
{
unsigned int length:8;
unsigned int valid:1;
unsigned int pad:7;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int buffer_length:6;
unsigned int buffer_address:26;
} bits0;
};
 
struct gen5_state_base_address
{
struct header header;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int general_state_address:27;
} bits0;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int surface_state_address:27;
} bits1;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int indirect_object_state_address:27;
} bits2;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int general_state_upper_bound:20;
} bits3;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int indirect_object_state_upper_bound:20;
} bits4;
};
 
struct gen5_state_prefetch
{
struct header header;
 
struct
{
unsigned int prefetch_count:3;
unsigned int pad:3;
unsigned int prefetch_pointer:26;
} bits0;
};
 
struct gen5_system_instruction_pointer
{
struct header header;
 
struct
{
unsigned int pad:4;
unsigned int system_instruction_pointer:28;
} bits0;
};
 
 
 
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
unsigned int pad0:1;
unsigned int grf_reg_count:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer:26;
};
 
struct thread1
{
unsigned int ext_halt_exception_enable:1;
unsigned int sw_exception_enable:1;
unsigned int mask_stack_exception_enable:1;
unsigned int timeout_exception_enable:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad0:3;
unsigned int depth_coef_urb_read_offset:6; /* WM only */
unsigned int pad1:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad3:5;
unsigned int single_program_flow:1;
};
 
struct thread2
{
unsigned int per_thread_scratch_space:4;
unsigned int pad0:6;
unsigned int scratch_space_base_pointer:22;
};
 
struct thread3
{
unsigned int dispatch_grf_start_reg:4;
unsigned int urb_entry_read_offset:6;
unsigned int pad0:1;
unsigned int urb_entry_read_length:6;
unsigned int pad1:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int pad2:1;
unsigned int const_urb_entry_read_length:6;
unsigned int pad3:1;
};
 
 
 
struct gen5_clip_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:9;
unsigned int gs_output_stats:1; /* not always */
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6; /* may be less */
unsigned int pad3:1;
} thread4;
struct
{
unsigned int pad0:13;
unsigned int clip_mode:3;
unsigned int userclip_enable_flags:8;
unsigned int userclip_must_clip:1;
unsigned int pad1:1;
unsigned int guard_band_enable:1;
unsigned int viewport_z_clip_enable:1;
unsigned int viewport_xy_clip_enable:1;
unsigned int vertex_position_space:1;
unsigned int api_mode:1;
unsigned int pad2:1;
} clip5;
struct
{
unsigned int pad0:5;
unsigned int clipper_viewport_state_ptr:27;
} clip6;
 
float viewport_xmin;
float viewport_xmax;
float viewport_ymin;
float viewport_ymax;
};
 
 
 
struct gen5_cc_unit_state
{
struct
{
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} cc0;
 
struct
{
unsigned int bf_stencil_ref:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
unsigned int stencil_ref:8;
} cc1;
 
struct
{
unsigned int logicop_enable:1;
unsigned int pad0:10;
unsigned int depth_write_enable:1;
unsigned int depth_test_function:3;
unsigned int depth_test:1;
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
} cc2;
 
struct
{
unsigned int pad0:8;
unsigned int alpha_test_func:3;
unsigned int alpha_test:1;
unsigned int blend_enable:1;
unsigned int ia_blend_enable:1;
unsigned int pad1:1;
unsigned int alpha_test_format:1;
unsigned int pad2:16;
} cc3;
struct
{
unsigned int pad0:5;
unsigned int cc_viewport_state_offset:27;
} cc4;
struct
{
unsigned int pad0:2;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_src_blend_factor:5;
unsigned int ia_blend_function:3;
unsigned int statistics_enable:1;
unsigned int logicop_func:4;
unsigned int pad1:11;
unsigned int dither_enable:1;
} cc5;
 
struct
{
unsigned int clamp_post_alpha_blend:1;
unsigned int clamp_pre_alpha_blend:1;
unsigned int clamp_range:2;
unsigned int pad0:11;
unsigned int y_dither_offset:2;
unsigned int x_dither_offset:2;
unsigned int dest_blend_factor:5;
unsigned int src_blend_factor:5;
unsigned int blend_function:3;
} cc6;
 
struct {
union {
float f;
unsigned char ub[4];
} alpha_ref;
} cc7;
};
 
 
 
struct gen5_sf_unit_state
{
struct thread0 thread0;
struct {
unsigned int pad0:7;
unsigned int sw_exception_enable:1;
unsigned int pad1:3;
unsigned int mask_stack_exception_enable:1;
unsigned int pad2:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad3:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad4:5;
unsigned int single_program_flow:1;
} sf1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6;
unsigned int pad3:1;
} thread4;
 
struct
{
unsigned int front_winding:1;
unsigned int viewport_transform:1;
unsigned int pad0:3;
unsigned int sf_viewport_state_offset:27;
} sf5;
struct
{
unsigned int pad0:9;
unsigned int dest_org_vbias:4;
unsigned int dest_org_hbias:4;
unsigned int scissor:1;
unsigned int disable_2x2_trifilter:1;
unsigned int disable_zero_pix_trifilter:1;
unsigned int point_rast_rule:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int line_width:4;
unsigned int fast_scissor_disable:1;
unsigned int cull_mode:2;
unsigned int aa_enable:1;
} sf6;
 
struct
{
unsigned int point_size:11;
unsigned int use_point_size_state:1;
unsigned int subpixel_precision:1;
unsigned int sprite_point:1;
unsigned int pad0:11;
unsigned int trifan_pv:2;
unsigned int linestrip_pv:2;
unsigned int tristrip_pv:2;
unsigned int line_last_pixel_enable:1;
} sf7;
 
};
 
 
struct gen5_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:1;
unsigned int pad3:6;
} thread4;
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} gs5;
 
struct
{
unsigned int max_vp_index:4;
unsigned int pad0:26;
unsigned int reorder_enable:1;
unsigned int pad1:1;
} gs6;
};
 
 
struct gen5_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:4;
unsigned int pad3:3;
} thread4;
 
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} vs5;
 
struct
{
unsigned int vs_enable:1;
unsigned int vert_cache_disable:1;
unsigned int pad0:30;
} vs6;
};
 
 
struct gen5_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned int stats_enable:1;
unsigned int pad0:1;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} wm4;
struct
{
unsigned int enable_8_pix:1;
unsigned int enable_16_pix:1;
unsigned int enable_32_pix:1;
unsigned int pad0:7;
unsigned int legacy_global_depth_bias:1;
unsigned int line_stipple:1;
unsigned int depth_offset:1;
unsigned int polygon_stipple:1;
unsigned int line_aa_region_width:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int early_depth_test:1;
unsigned int thread_dispatch_enable:1;
unsigned int program_uses_depth:1;
unsigned int program_computes_depth:1;
unsigned int program_uses_killpixel:1;
unsigned int legacy_line_rast: 1;
unsigned int transposed_urb_read:1;
unsigned int max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_1:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_1:26;
} wm8;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_2:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_2:26;
} wm9;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_3:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_3:26;
} wm10;
};
 
struct gen5_wm_unit_state_padded {
struct gen5_wm_unit_state state;
char pad[64 - sizeof(struct gen5_wm_unit_state)];
};
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen5_sampler_default_border_color {
float color[4];
};
 
struct gen5_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen5_sampler_state {
struct {
unsigned int shadow_function:3;
unsigned int lod_bias:11;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad:1;
unsigned int lod_preclamp:1;
unsigned int border_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct {
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:3;
unsigned int max_lod:10;
unsigned int min_lod:10;
} ss1;
 
struct {
unsigned int pad:5;
unsigned int border_color_pointer:27;
} ss2;
 
struct {
uint32_t pad:13;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t mbz:6;
} ss3;
};
 
 
struct gen5_clipper_viewport
{
float xmin;
float xmax;
float ymin;
float ymax;
};
 
struct gen5_cc_viewport
{
float min_depth;
float max_depth;
};
 
struct gen5_sf_viewport
{
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
 
struct {
short xmin;
short ymin;
short xmax;
short ymax;
} scissor;
};
 
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct gen5_surface_state
{
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad:3;
unsigned int render_cache_read_mode:1;
unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int color_blend:1;
unsigned int writedisable_blue:1;
unsigned int writedisable_green:1;
unsigned int writedisable_red:1;
unsigned int writedisable_alpha:1;
unsigned int surface_format:9;
unsigned int data_return_format:1;
unsigned int pad0:1;
unsigned int surface_type:3;
} ss0;
struct {
unsigned int base_addr;
} ss1;
struct {
unsigned int render_target_rotation:2;
unsigned int mip_count:4;
unsigned int width:13;
unsigned int height:13;
} ss2;
 
struct {
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad:1;
unsigned int pitch:18;
unsigned int depth:11;
} ss3;
struct {
unsigned int pad:19;
unsigned int min_array_elt:9;
unsigned int min_lod:4;
} ss4;
 
struct {
unsigned int pad:20;
unsigned int y_offset:4;
unsigned int pad2:1;
unsigned int x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN5_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN5_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN5_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN5_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN5_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN5_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN5_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN5_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN5_SURFACE_FORMAT_SHIFT 18
#define GEN5_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define GEN5_SURFACE_TYPE_SHIFT 29
#define GEN5_SURFACE_TYPE_MASK GEN5_MASK(31, 29)
#define GEN5_SURFACE_1D 0
#define GEN5_SURFACE_2D 1
#define GEN5_SURFACE_3D 2
#define GEN5_SURFACE_CUBE 3
#define GEN5_SURFACE_BUFFER 4
#define GEN5_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN5_SURFACE_HEIGHT_SHIFT 19
#define GEN5_SURFACE_HEIGHT_MASK GEN5_MASK(31, 19)
#define GEN5_SURFACE_WIDTH_SHIFT 6
#define GEN5_SURFACE_WIDTH_MASK GEN5_MASK(18, 6)
#define GEN5_SURFACE_LOD_SHIFT 2
#define GEN5_SURFACE_LOD_MASK GEN5_MASK(5, 2)
 
/* Surface state DW3 */
#define GEN5_SURFACE_DEPTH_SHIFT 21
#define GEN5_SURFACE_DEPTH_MASK GEN5_MASK(31, 21)
#define GEN5_SURFACE_PITCH_SHIFT 3
#define GEN5_SURFACE_PITCH_MASK GEN5_MASK(19, 3)
#define GEN5_SURFACE_TILED (1 << 1)
#define GEN5_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN5_SURFACE_MIN_LOD_SHIFT 28
#define GEN5_SURFACE_MIN_LOD_MASK GEN5_MASK(31, 28)
 
/* Surface state DW5 */
#define GEN5_SURFACE_X_OFFSET_SHIFT 25
#define GEN5_SURFACE_X_OFFSET_MASK GEN5_MASK(31, 25)
#define GEN5_SURFACE_Y_OFFSET_SHIFT 20
#define GEN5_SURFACE_Y_OFFSET_MASK GEN5_MASK(23, 20)
 
struct gen5_vertex_buffer_state
{
struct {
unsigned int pitch:11;
unsigned int pad:15;
unsigned int access_type:1;
unsigned int vb_index:5;
} vb0;
unsigned int start_addr;
unsigned int max_index;
#if 1
unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
#endif
};
 
#define GEN5_VBP_MAX 17
 
struct gen5_vb_array_state {
struct header header;
struct gen5_vertex_buffer_state vb[GEN5_VBP_MAX];
};
 
 
struct gen5_vertex_element_state
{
struct
{
unsigned int src_offset:11;
unsigned int pad:5;
unsigned int src_format:9;
unsigned int pad0:1;
unsigned int valid:1;
unsigned int vertex_buffer_index:5;
} ve0;
struct
{
unsigned int dst_offset:8;
unsigned int pad:8;
unsigned int vfcomponent3:4;
unsigned int vfcomponent2:4;
unsigned int vfcomponent1:4;
unsigned int vfcomponent0:4;
} ve1;
};
 
#define GEN5_VEP_MAX 18
 
struct gen5_vertex_element_packet {
struct header header;
struct gen5_vertex_element_state ve[GEN5_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
};
 
 
struct gen5_urb_immediate {
unsigned int opcode:4;
unsigned int offset:6;
unsigned int swizzle_control:2;
unsigned int pad:1;
unsigned int allocate:1;
unsigned int used:1;
unsigned int complete:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct gen5_instruction
{
struct
{
unsigned int opcode:7;
unsigned int pad:1;
unsigned int access_mode:1;
unsigned int mask_control:1;
unsigned int dependency_control:2;
unsigned int compression_control:2;
unsigned int thread_control:2;
unsigned int predicate_control:4;
unsigned int predicate_inverse:1;
unsigned int execution_size:3;
unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
unsigned int pad0:2;
unsigned int debug_control:1;
unsigned int saturate:1;
} header;
 
union {
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad:1;
unsigned int dest_subreg_nr:5;
unsigned int dest_reg_nr:8;
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} da1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad:6;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} ia1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad0:1;
unsigned int dest_writemask:4;
unsigned int dest_subreg_nr:1;
unsigned int dest_reg_nr:8;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} da16;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad0:6;
unsigned int dest_writemask:4;
int dest_indirect_offset:6;
unsigned int dest_subreg_nr:3;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} ia16;
} bits1;
 
 
union {
struct
{
unsigned int src0_subreg_nr:5;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} da1;
 
struct
{
int src0_indirect_offset:10;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} ia1;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
unsigned int src0_subreg_nr:1;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} da16;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
int src0_indirect_offset:6;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia16;
 
} bits2;
 
union
{
struct
{
unsigned int src1_subreg_nr:5;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int pad0:7;
} da1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
unsigned int src1_subreg_nr:1;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int pad2:7;
} da16;
 
struct
{
int src1_indirect_offset:10;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
int src1_indirect_offset:6;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad2:6;
} ia16;
 
 
struct
{
int jump_count:16; /* note: signed */
unsigned int pop_count:4;
unsigned int pad0:12;
} if_else;
 
struct {
unsigned int function:4;
unsigned int int_type:1;
unsigned int precision:1;
unsigned int saturate:1;
unsigned int data_type:1;
unsigned int pad0:8;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} math;
 
struct {
unsigned int binding_table_index:8;
unsigned int sampler:4;
unsigned int return_format:2;
unsigned int msg_type:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} sampler;
 
struct gen5_urb_immediate urb;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:4;
unsigned int msg_type:2;
unsigned int target_cache:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_read;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:3;
unsigned int pixel_scoreboard_clear:1;
unsigned int msg_type:3;
unsigned int send_commit_msg:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_write;
 
struct {
unsigned int pad:16;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} generic;
 
unsigned int ud;
} bits3;
};
 
/* media pipeline */
 
struct gen5_vfe_state {
struct {
unsigned int per_thread_scratch_space:4;
unsigned int pad3:3;
unsigned int extend_vfe_state_present:1;
unsigned int pad2:2;
unsigned int scratch_base:22;
} vfe0;
 
struct {
unsigned int debug_counter_control:2;
unsigned int children_present:1;
unsigned int vfe_mode:4;
unsigned int pad2:2;
unsigned int num_urb_entries:7;
unsigned int urb_entry_alloc_size:9;
unsigned int max_threads:7;
} vfe1;
 
struct {
unsigned int pad4:4;
unsigned int interface_descriptor_base:28;
} vfe2;
};
 
struct gen5_vld_state {
struct {
unsigned int pad6:6;
unsigned int scan_order:1;
unsigned int intra_vlc_format:1;
unsigned int quantizer_scale_type:1;
unsigned int concealment_motion_vector:1;
unsigned int frame_predict_frame_dct:1;
unsigned int top_field_first:1;
unsigned int picture_structure:2;
unsigned int intra_dc_precision:2;
unsigned int f_code_0_0:4;
unsigned int f_code_0_1:4;
unsigned int f_code_1_0:4;
unsigned int f_code_1_1:4;
} vld0;
 
struct {
unsigned int pad2:9;
unsigned int picture_coding_type:2;
unsigned int pad:21;
} vld1;
 
struct {
unsigned int index_0:4;
unsigned int index_1:4;
unsigned int index_2:4;
unsigned int index_3:4;
unsigned int index_4:4;
unsigned int index_5:4;
unsigned int index_6:4;
unsigned int index_7:4;
} desc_remap_table0;
 
struct {
unsigned int index_8:4;
unsigned int index_9:4;
unsigned int index_10:4;
unsigned int index_11:4;
unsigned int index_12:4;
unsigned int index_13:4;
unsigned int index_14:4;
unsigned int index_15:4;
} desc_remap_table1;
};
 
struct gen5_interface_descriptor {
struct {
unsigned int grf_reg_blocks:4;
unsigned int pad:2;
unsigned int kernel_start_pointer:26;
} desc0;
 
struct {
unsigned int pad:7;
unsigned int software_exception:1;
unsigned int pad2:3;
unsigned int maskstack_exception:1;
unsigned int pad3:1;
unsigned int illegal_opcode_exception:1;
unsigned int pad4:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int single_program_flow:1;
unsigned int pad5:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int const_urb_entry_read_len:6;
} desc1;
 
struct {
unsigned int pad:2;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} desc2;
 
struct {
unsigned int binding_table_entry_count:5;
unsigned int binding_table_pointer:27;
} desc3;
};
 
struct gen6_blend_state
{
struct {
unsigned int dest_blend_factor:5;
unsigned int source_blend_factor:5;
unsigned int pad3:1;
unsigned int blend_func:3;
unsigned int pad2:1;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_source_blend_factor:5;
unsigned int pad1:1;
unsigned int ia_blend_func:3;
unsigned int pad0:1;
unsigned int ia_blend_enable:1;
unsigned int blend_enable:1;
} blend0;
 
struct {
unsigned int post_blend_clamp_enable:1;
unsigned int pre_blend_clamp_enable:1;
unsigned int clamp_range:2;
unsigned int pad0:4;
unsigned int x_dither_offset:2;
unsigned int y_dither_offset:2;
unsigned int dither_enable:1;
unsigned int alpha_test_func:3;
unsigned int alpha_test_enable:1;
unsigned int pad1:1;
unsigned int logic_op_func:4;
unsigned int logic_op_enable:1;
unsigned int pad2:1;
unsigned int write_disable_b:1;
unsigned int write_disable_g:1;
unsigned int write_disable_r:1;
unsigned int write_disable_a:1;
unsigned int pad3:1;
unsigned int alpha_to_coverage_dither:1;
unsigned int alpha_to_one:1;
unsigned int alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
unsigned int alpha_test_format:1;
unsigned int pad0:14;
unsigned int round_disable:1;
unsigned int bf_stencil_ref:8;
unsigned int stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
unsigned int ui:8;
unsigned int pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} ds0;
 
struct {
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
} ds1;
 
struct {
unsigned int pad0:26;
unsigned int depth_write_enable:1;
unsigned int depth_test_func:3;
unsigned int pad1:1;
unsigned int depth_test_enable:1;
} ds2;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
typedef enum {
WM_KERNEL = 0,
WM_KERNEL_P,
 
WM_KERNEL_MASK,
WM_KERNEL_MASK_P,
 
WM_KERNEL_MASKCA,
WM_KERNEL_MASKCA_P,
 
WM_KERNEL_MASKSA,
WM_KERNEL_MASKSA_P,
 
WM_KERNEL_OPACITY,
WM_KERNEL_OPACITY_P,
 
WM_KERNEL_VIDEO_PLANAR,
WM_KERNEL_VIDEO_PACKED,
KERNEL_COUNT
} wm_kernel_t;
#endif
/drivers/video/Intel-2D/gen7_render.h
0,0 → 1,1364
#ifndef GEN7_RENDER_H
#define GEN7_RENDER_H
 
#define INTEL_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
 
#define GEN7_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN7_STATE_BASE_ADDRESS GEN7_3D(0, 1, 1)
#define GEN7_STATE_SIP GEN7_3D(0, 1, 2)
 
#define GEN7_PIPELINE_SELECT GEN7_3D(1, 1, 4)
 
#define GEN7_MEDIA_STATE_POINTERS GEN7_3D(2, 0, 0)
#define GEN7_MEDIA_OBJECT GEN7_3D(2, 1, 0)
 
#define GEN7_3DSTATE_VERTEX_BUFFERS GEN7_3D(3, 0, 8)
#define GEN7_3DSTATE_VERTEX_ELEMENTS GEN7_3D(3, 0, 9)
#define GEN7_3DSTATE_INDEX_BUFFER GEN7_3D(3, 0, 0xa)
#define GEN7_3DSTATE_VF_STATISTICS GEN7_3D(3, 0, 0xb)
 
#define GEN7_3DSTATE_DRAWING_RECTANGLE GEN7_3D(3, 1, 0)
#define GEN7_3DSTATE_CONSTANT_COLOR GEN7_3D(3, 1, 1)
#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD GEN7_3D(3, 1, 2)
#define GEN7_3DSTATE_CHROMA_KEY GEN7_3D(3, 1, 4)
 
#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET GEN7_3D(3, 1, 6)
#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN GEN7_3D(3, 1, 7)
#define GEN7_3DSTATE_LINE_STIPPLE GEN7_3D(3, 1, 8)
#define GEN7_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN7_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN7_3DSTATE_AA_LINE_PARAMS GEN7_3D(3, 1, 0xa)
#define GEN7_3DSTATE_GS_SVB_INDEX GEN7_3D(3, 1, 0xb)
 
#define GEN7_3DPRIMITIVE GEN7_3D(3, 3, 0)
 
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS GEN7_3D(3, 0, 0x02)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN7_3DSTATE_URB GEN7_3D(3, 0, 0x05)
/* DW1 */
# define GEN7_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN7_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN7_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN7_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS GEN7_3D(3, 0, 0x0d)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN7_3DSTATE_CC_STATE_POINTERS GEN7_3D(3, 0, 0x0e)
 
#define GEN7_3DSTATE_VS GEN7_3D(3, 0, 0x10)
 
#define GEN7_3DSTATE_GS GEN7_3D(3, 0, 0x11)
/* DW4 */
# define GEN7_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN7_3DSTATE_CLIP GEN7_3D(3, 0, 0x12)
 
#define GEN7_3DSTATE_SF GEN7_3D(3, 0, 0x13)
/* DW1 */
# define GEN7_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN7_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN7_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN7_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN7_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN7_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN7_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN7_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
#define GEN7_3DSTATE_WM GEN7_3D(3, 0, 0x14)
/* DW1 */
# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
# define GEN7_WM_DEPTH_CLEAR (1 << 30)
# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
# define GEN7_WM_DEPTH_RESOLVE (1 << 28)
# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
# define GEN7_WM_KILL_ENABLE (1 << 25)
# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
# define GEN7_WM_PSCDEPTH_ON (1 << 23)
# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
# define GEN7_WM_USES_SOURCE_W (1 << 19)
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16)
# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15)
# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14)
# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13)
# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12)
# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
 
 
#define GEN7_3DSTATE_CONSTANT_VS GEN7_3D(3, 0, 0x15)
#define GEN7_3DSTATE_CONSTANT_GS GEN7_3D(3, 0, 0x16)
#define GEN7_3DSTATE_CONSTANT_PS GEN7_3D(3, 0, 0x17)
 
#define GEN7_3DSTATE_SAMPLE_MASK GEN7_3D(3, 0, 0x18)
 
#define GEN7_3DSTATE_MULTISAMPLE GEN7_3D(3, 1, 0x0d)
/* DW1 */
# define GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
/* for GEN7_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN7_PIPE_CONTROL */
#define GEN7_PIPE_CONTROL GEN7_3D(3, 2, 0)
#define GEN7_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN7_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN7_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN7_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN7_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN7_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN7_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN7_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN7_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN7_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN7_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define GEN7_VB0_BUFFER_INDEX_SHIFT 26
#define GEN7_VB0_VERTEXDATA (0 << 20)
#define GEN7_VB0_INSTANCEDATA (1 << 20)
#define GEN7_VB0_BUFFER_PITCH_SHIFT 0
#define GEN7_VB0_ADDRESS_MODIFY_ENABLE (1 << 14)
 
/* VERTEX_ELEMENT_STATE Structure */
#define GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT 26
#define GEN7_VE0_VALID (1 << 25)
#define GEN7_VE0_FORMAT_SHIFT 16
#define GEN7_VE0_OFFSET_SHIFT 0
#define GEN7_VE1_VFCOMPONENT_0_SHIFT 28
#define GEN7_VE1_VFCOMPONENT_1_SHIFT 24
#define GEN7_VE1_VFCOMPONENT_2_SHIFT 20
#define GEN7_VE1_VFCOMPONENT_3_SHIFT 16
#define GEN7_VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN7_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
 
#define GEN7_SVG_CTL 0x7400
 
#define GEN7_SVG_CTL_GS_BA (0 << 8)
#define GEN7_SVG_CTL_SS_BA (1 << 8)
#define GEN7_SVG_CTL_IO_BA (2 << 8)
#define GEN7_SVG_CTL_GS_AUB (3 << 8)
#define GEN7_SVG_CTL_IO_AUB (4 << 8)
#define GEN7_SVG_CTL_SIP (5 << 8)
 
#define GEN7_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN7_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN7_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN7_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN7_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN7_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN7_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN7_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_VF_STRG_VAL 0x7504
#define GEN7_VF_STR_VL_OVR 0x7508
#define GEN7_VF_VC_OVR 0x750c
#define GEN7_VF_STR_PSKIP 0x7510
#define GEN7_VF_MAX_PRIM 0x7514
#define GEN7_VF_RDATA 0x7518
 
#define GEN7_VS_CTL 0x7600
#define GEN7_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN7_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_VS_STRG_VAL 0x7604
#define GEN7_VS_RDATA 0x7608
 
#define GEN7_SF_CTL 0x7b00
#define GEN7_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN7_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN7_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN7_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_SF_STRG_VAL 0x7b04
#define GEN7_SF_RDATA 0x7b18
 
#define GEN7_WIZ_CTL 0x7c00
#define GEN7_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN7_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN7_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN7_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN7_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN7_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_WIZ_STRG_VAL 0x7c04
#define GEN7_WIZ_RDATA 0x7c18
 
#define GEN7_TS_CTL 0x7e00
#define GEN7_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN7_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN7_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN7_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN7_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_TS_STRG_VAL 0x7e04
#define GEN7_TS_RDATA 0x7e08
 
#define GEN7_TD_CTL 0x8000
#define GEN7_TD_CTL_MUX_SHIFT 8
#define GEN7_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN7_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN7_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN7_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN7_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN7_TD_CTL2 0x8004
#define GEN7_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN7_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN7_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN7_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN7_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN7_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN7_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN7_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN7_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN7_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN7_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN7_TD_VF_VS_EMSK 0x8008
#define GEN7_TD_GS_EMSK 0x800c
#define GEN7_TD_CLIP_EMSK 0x8010
#define GEN7_TD_SF_EMSK 0x8014
#define GEN7_TD_WIZ_EMSK 0x8018
#define GEN7_TD_0_6_EHTRG_VAL 0x801c
#define GEN7_TD_0_7_EHTRG_VAL 0x8020
#define GEN7_TD_0_6_EHTRG_MSK 0x8024
#define GEN7_TD_0_7_EHTRG_MSK 0x8028
#define GEN7_TD_RDATA 0x802c
#define GEN7_TD_TS_EMSK 0x8030
 
#define GEN7_EU_CTL 0x8800
#define GEN7_EU_CTL_SELECT_SHIFT 16
#define GEN7_EU_CTL_DATA_MUX_SHIFT 8
#define GEN7_EU_ATT_0 0x8810
#define GEN7_EU_ATT_1 0x8814
#define GEN7_EU_ATT_DATA_0 0x8820
#define GEN7_EU_ATT_DATA_1 0x8824
#define GEN7_EU_ATT_CLR_0 0x8830
#define GEN7_EU_ATT_CLR_1 0x8834
#define GEN7_EU_RDATA 0x8840
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN7_ANISORATIO_2 0
#define GEN7_ANISORATIO_4 1
#define GEN7_ANISORATIO_6 2
#define GEN7_ANISORATIO_8 3
#define GEN7_ANISORATIO_10 4
#define GEN7_ANISORATIO_12 5
#define GEN7_ANISORATIO_14 6
#define GEN7_ANISORATIO_16 7
 
#define GEN7_BLENDFACTOR_ONE 0x1
#define GEN7_BLENDFACTOR_SRC_COLOR 0x2
#define GEN7_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN7_BLENDFACTOR_DST_ALPHA 0x4
#define GEN7_BLENDFACTOR_DST_COLOR 0x5
#define GEN7_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN7_BLENDFACTOR_CONST_COLOR 0x7
#define GEN7_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN7_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN7_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN7_BLENDFACTOR_ZERO 0x11
#define GEN7_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN7_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN7_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN7_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN7_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN7_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN7_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN7_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN7_BLENDFUNCTION_ADD 0
#define GEN7_BLENDFUNCTION_SUBTRACT 1
#define GEN7_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN7_BLENDFUNCTION_MIN 3
#define GEN7_BLENDFUNCTION_MAX 4
 
#define GEN7_ALPHATEST_FORMAT_UNORM8 0
#define GEN7_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN7_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN7_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN7_CLIP_API_OGL 0
#define GEN7_CLIP_API_DX 1
 
#define GEN7_CLIPMODE_NORMAL 0
#define GEN7_CLIPMODE_CLIP_ALL 1
#define GEN7_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN7_CLIPMODE_REJECT_ALL 3
#define GEN7_CLIPMODE_ACCEPT_ALL 4
 
#define GEN7_CLIP_NDCSPACE 0
#define GEN7_CLIP_SCREENSPACE 1
 
#define GEN7_COMPAREFUNCTION_ALWAYS 0
#define GEN7_COMPAREFUNCTION_NEVER 1
#define GEN7_COMPAREFUNCTION_LESS 2
#define GEN7_COMPAREFUNCTION_EQUAL 3
#define GEN7_COMPAREFUNCTION_LEQUAL 4
#define GEN7_COMPAREFUNCTION_GREATER 5
#define GEN7_COMPAREFUNCTION_NOTEQUAL 6
#define GEN7_COMPAREFUNCTION_GEQUAL 7
 
#define GEN7_COVERAGE_PIXELS_HALF 0
#define GEN7_COVERAGE_PIXELS_1 1
#define GEN7_COVERAGE_PIXELS_2 2
#define GEN7_COVERAGE_PIXELS_4 3
 
#define GEN7_CULLMODE_BOTH 0
#define GEN7_CULLMODE_NONE 1
#define GEN7_CULLMODE_FRONT 2
#define GEN7_CULLMODE_BACK 3
 
#define GEN7_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN7_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN7_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN7_DEPTHFORMAT_D32_FLOAT 1
#define GEN7_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN7_DEPTHFORMAT_D16_UNORM 5
 
#define GEN7_FLOATING_POINT_IEEE_754 0
#define GEN7_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN7_FRONTWINDING_CW 0
#define GEN7_FRONTWINDING_CCW 1
 
#define GEN7_INDEX_BYTE 0
#define GEN7_INDEX_WORD 1
#define GEN7_INDEX_DWORD 2
 
#define GEN7_LOGICOPFUNCTION_CLEAR 0
#define GEN7_LOGICOPFUNCTION_NOR 1
#define GEN7_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN7_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN7_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN7_LOGICOPFUNCTION_INVERT 5
#define GEN7_LOGICOPFUNCTION_XOR 6
#define GEN7_LOGICOPFUNCTION_NAND 7
#define GEN7_LOGICOPFUNCTION_AND 8
#define GEN7_LOGICOPFUNCTION_EQUIV 9
#define GEN7_LOGICOPFUNCTION_NOOP 10
#define GEN7_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN7_LOGICOPFUNCTION_COPY 12
#define GEN7_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN7_LOGICOPFUNCTION_OR 14
#define GEN7_LOGICOPFUNCTION_SET 15
 
#define GEN7_MAPFILTER_NEAREST 0x0
#define GEN7_MAPFILTER_LINEAR 0x1
#define GEN7_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN7_MIPFILTER_NONE 0
#define GEN7_MIPFILTER_NEAREST 1
#define GEN7_MIPFILTER_LINEAR 3
 
#define GEN7_POLYGON_FRONT_FACING 0
#define GEN7_POLYGON_BACK_FACING 1
 
#define GEN7_PREFILTER_ALWAYS 0x0
#define GEN7_PREFILTER_NEVER 0x1
#define GEN7_PREFILTER_LESS 0x2
#define GEN7_PREFILTER_EQUAL 0x3
#define GEN7_PREFILTER_LEQUAL 0x4
#define GEN7_PREFILTER_GREATER 0x5
#define GEN7_PREFILTER_NOTEQUAL 0x6
#define GEN7_PREFILTER_GEQUAL 0x7
 
#define GEN7_PROVOKING_VERTEX_0 0
#define GEN7_PROVOKING_VERTEX_1 1
#define GEN7_PROVOKING_VERTEX_2 2
 
#define GEN7_RASTRULE_UPPER_LEFT 0
#define GEN7_RASTRULE_UPPER_RIGHT 1
 
#define GEN7_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN7_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN7_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN7_STENCILOP_KEEP 0
#define GEN7_STENCILOP_ZERO 1
#define GEN7_STENCILOP_REPLACE 2
#define GEN7_STENCILOP_INCRSAT 3
#define GEN7_STENCILOP_DECRSAT 4
#define GEN7_STENCILOP_INCR 5
#define GEN7_STENCILOP_DECR 6
#define GEN7_STENCILOP_INVERT 7
 
#define GEN7_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN7_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN7_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN7_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN7_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN7_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN7_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN7_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN7_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN7_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN7_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN7_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN7_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN7_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN7_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN7_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN7_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN7_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN7_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN7_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN7_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN7_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN7_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN7_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN7_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN7_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN7_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN7_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN7_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN7_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN7_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN7_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN7_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN7_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN7_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN7_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN7_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN7_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN7_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN7_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN7_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN7_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN7_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN7_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN7_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN7_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN7_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN7_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN7_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN7_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN7_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN7_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN7_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN7_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN7_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN7_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN7_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN7_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN7_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN7_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN7_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN7_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN7_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN7_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN7_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN7_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN7_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN7_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN7_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN7_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN7_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN7_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN7_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN7_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN7_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN7_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN7_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN7_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN7_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN7_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN7_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN7_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN7_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN7_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN7_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN7_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN7_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN7_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN7_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN7_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN7_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN7_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN7_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN7_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN7_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN7_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN7_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN7_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN7_SURFACEFORMAT_R16_SINT 0x10C
#define GEN7_SURFACEFORMAT_R16_UINT 0x10D
#define GEN7_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN7_SURFACEFORMAT_I16_UNORM 0x111
#define GEN7_SURFACEFORMAT_L16_UNORM 0x112
#define GEN7_SURFACEFORMAT_A16_UNORM 0x113
#define GEN7_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN7_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN7_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN7_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN7_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN7_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN7_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN7_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN7_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN7_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN7_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN7_SURFACEFORMAT_R8_UNORM 0x140
#define GEN7_SURFACEFORMAT_R8_SNORM 0x141
#define GEN7_SURFACEFORMAT_R8_SINT 0x142
#define GEN7_SURFACEFORMAT_R8_UINT 0x143
#define GEN7_SURFACEFORMAT_A8_UNORM 0x144
#define GEN7_SURFACEFORMAT_I8_UNORM 0x145
#define GEN7_SURFACEFORMAT_L8_UNORM 0x146
#define GEN7_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN7_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN7_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN7_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN7_SURFACEFORMAT_R1_UINT 0x181
#define GEN7_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN7_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN7_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN7_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN7_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN7_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN7_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN7_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN7_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN7_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN7_SURFACEFORMAT_MONO8 0x18E
#define GEN7_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN7_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN7_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN7_SURFACEFORMAT_FXT1 0x192
#define GEN7_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN7_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN7_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN7_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN7_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN7_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN7_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN7_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN7_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN7_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN7_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN7_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN7_SURFACERETURNFORMAT_FLOAT32 0
#define GEN7_SURFACERETURNFORMAT_S1 1
 
#define GEN7_SURFACE_1D 0
#define GEN7_SURFACE_2D 1
#define GEN7_SURFACE_3D 2
#define GEN7_SURFACE_CUBE 3
#define GEN7_SURFACE_BUFFER 4
#define GEN7_SURFACE_NULL 7
 
#define GEN7_BORDER_COLOR_MODE_DEFAULT 0
#define GEN7_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN7_TEXCOORDMODE_WRAP 0
#define GEN7_TEXCOORDMODE_MIRROR 1
#define GEN7_TEXCOORDMODE_CLAMP 2
#define GEN7_TEXCOORDMODE_CUBE 3
#define GEN7_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN7_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN7_THREAD_PRIORITY_NORMAL 0
#define GEN7_THREAD_PRIORITY_HIGH 1
 
#define GEN7_TILEWALK_XMAJOR 0
#define GEN7_TILEWALK_YMAJOR 1
 
#define GEN7_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN7_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN7_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN7_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define GEN7_VFCOMPONENT_NOSTORE 0
#define GEN7_VFCOMPONENT_STORE_SRC 1
#define GEN7_VFCOMPONENT_STORE_0 2
#define GEN7_VFCOMPONENT_STORE_1_FLT 3
#define GEN7_VFCOMPONENT_STORE_1_INT 4
#define GEN7_VFCOMPONENT_STORE_VID 5
#define GEN7_VFCOMPONENT_STORE_IID 6
#define GEN7_VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN7_ALIGN_1 0
#define GEN7_ALIGN_16 1
 
#define GEN7_ADDRESS_DIRECT 0
#define GEN7_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN7_CHANNEL_X 0
#define GEN7_CHANNEL_Y 1
#define GEN7_CHANNEL_Z 2
#define GEN7_CHANNEL_W 3
 
#define GEN7_COMPRESSION_NONE 0
#define GEN7_COMPRESSION_2NDHALF 1
#define GEN7_COMPRESSION_COMPRESSED 2
 
#define GEN7_CONDITIONAL_NONE 0
#define GEN7_CONDITIONAL_Z 1
#define GEN7_CONDITIONAL_NZ 2
#define GEN7_CONDITIONAL_EQ 1 /* Z */
#define GEN7_CONDITIONAL_NEQ 2 /* NZ */
#define GEN7_CONDITIONAL_G 3
#define GEN7_CONDITIONAL_GE 4
#define GEN7_CONDITIONAL_L 5
#define GEN7_CONDITIONAL_LE 6
#define GEN7_CONDITIONAL_C 7
#define GEN7_CONDITIONAL_O 8
 
#define GEN7_DEBUG_NONE 0
#define GEN7_DEBUG_BREAKPOINT 1
 
#define GEN7_DEPENDENCY_NORMAL 0
#define GEN7_DEPENDENCY_NOTCLEARED 1
#define GEN7_DEPENDENCY_NOTCHECKED 2
#define GEN7_DEPENDENCY_DISABLE 3
 
#define GEN7_EXECUTE_1 0
#define GEN7_EXECUTE_2 1
#define GEN7_EXECUTE_4 2
#define GEN7_EXECUTE_8 3
#define GEN7_EXECUTE_16 4
#define GEN7_EXECUTE_32 5
 
#define GEN7_HORIZONTAL_STRIDE_0 0
#define GEN7_HORIZONTAL_STRIDE_1 1
#define GEN7_HORIZONTAL_STRIDE_2 2
#define GEN7_HORIZONTAL_STRIDE_4 3
 
#define GEN7_INSTRUCTION_NORMAL 0
#define GEN7_INSTRUCTION_SATURATE 1
 
#define INTEL_MASK_ENABLE 0
#define INTEL_MASK_DISABLE 1
 
#define GEN7_OPCODE_MOV 1
#define GEN7_OPCODE_SEL 2
#define GEN7_OPCODE_NOT 4
#define GEN7_OPCODE_AND 5
#define GEN7_OPCODE_OR 6
#define GEN7_OPCODE_XOR 7
#define GEN7_OPCODE_SHR 8
#define GEN7_OPCODE_SHL 9
#define GEN7_OPCODE_RSR 10
#define GEN7_OPCODE_RSL 11
#define GEN7_OPCODE_ASR 12
#define GEN7_OPCODE_CMP 16
#define GEN7_OPCODE_JMPI 32
#define GEN7_OPCODE_IF 34
#define GEN7_OPCODE_IFF 35
#define GEN7_OPCODE_ELSE 36
#define GEN7_OPCODE_ENDIF 37
#define GEN7_OPCODE_DO 38
#define GEN7_OPCODE_WHILE 39
#define GEN7_OPCODE_BREAK 40
#define GEN7_OPCODE_CONTINUE 41
#define GEN7_OPCODE_HALT 42
#define GEN7_OPCODE_MSAVE 44
#define GEN7_OPCODE_MRESTORE 45
#define GEN7_OPCODE_PUSH 46
#define GEN7_OPCODE_POP 47
#define GEN7_OPCODE_WAIT 48
#define GEN7_OPCODE_SEND 49
#define GEN7_OPCODE_ADD 64
#define GEN7_OPCODE_MUL 65
#define GEN7_OPCODE_AVG 66
#define GEN7_OPCODE_FRC 67
#define GEN7_OPCODE_RNDU 68
#define GEN7_OPCODE_RNDD 69
#define GEN7_OPCODE_RNDE 70
#define GEN7_OPCODE_RNDZ 71
#define GEN7_OPCODE_MAC 72
#define GEN7_OPCODE_MACH 73
#define GEN7_OPCODE_LZD 74
#define GEN7_OPCODE_SAD2 80
#define GEN7_OPCODE_SADA2 81
#define GEN7_OPCODE_DP4 84
#define GEN7_OPCODE_DPH 85
#define GEN7_OPCODE_DP3 86
#define GEN7_OPCODE_DP2 87
#define GEN7_OPCODE_DPA2 88
#define GEN7_OPCODE_LINE 89
#define GEN7_OPCODE_NOP 126
 
#define GEN7_PREDICATE_NONE 0
#define GEN7_PREDICATE_NORMAL 1
#define GEN7_PREDICATE_ALIGN1_ANYV 2
#define GEN7_PREDICATE_ALIGN1_ALLV 3
#define GEN7_PREDICATE_ALIGN1_ANY2H 4
#define GEN7_PREDICATE_ALIGN1_ALL2H 5
#define GEN7_PREDICATE_ALIGN1_ANY4H 6
#define GEN7_PREDICATE_ALIGN1_ALL4H 7
#define GEN7_PREDICATE_ALIGN1_ANY8H 8
#define GEN7_PREDICATE_ALIGN1_ALL8H 9
#define GEN7_PREDICATE_ALIGN1_ANY16H 10
#define GEN7_PREDICATE_ALIGN1_ALL16H 11
#define GEN7_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN7_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN7_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN7_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN7_PREDICATE_ALIGN16_ANY4H 6
#define GEN7_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN7_ARCHITECTURE_REGISTER_FILE 0
#define GEN7_GENERAL_REGISTER_FILE 1
#define GEN7_MESSAGE_REGISTER_FILE 2
#define GEN7_IMMEDIATE_VALUE 3
 
#define GEN7_REGISTER_TYPE_UD 0
#define GEN7_REGISTER_TYPE_D 1
#define GEN7_REGISTER_TYPE_UW 2
#define GEN7_REGISTER_TYPE_W 3
#define GEN7_REGISTER_TYPE_UB 4
#define GEN7_REGISTER_TYPE_B 5
#define GEN7_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN7_REGISTER_TYPE_HF 6
#define GEN7_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN7_REGISTER_TYPE_F 7
 
#define GEN7_ARF_NULL 0x00
#define GEN7_ARF_ADDRESS 0x10
#define GEN7_ARF_ACCUMULATOR 0x20
#define GEN7_ARF_FLAG 0x30
#define GEN7_ARF_MASK 0x40
#define GEN7_ARF_MASK_STACK 0x50
#define GEN7_ARF_MASK_STACK_DEPTH 0x60
#define GEN7_ARF_STATE 0x70
#define GEN7_ARF_CONTROL 0x80
#define GEN7_ARF_NOTIFICATION_COUNT 0x90
#define GEN7_ARF_IP 0xA0
 
#define GEN7_AMASK 0
#define GEN7_IMASK 1
#define GEN7_LMASK 2
#define GEN7_CMASK 3
 
#define GEN7_THREAD_NORMAL 0
#define GEN7_THREAD_ATOMIC 1
#define GEN7_THREAD_SWITCH 2
 
#define GEN7_VERTICAL_STRIDE_0 0
#define GEN7_VERTICAL_STRIDE_1 1
#define GEN7_VERTICAL_STRIDE_2 2
#define GEN7_VERTICAL_STRIDE_4 3
#define GEN7_VERTICAL_STRIDE_8 4
#define GEN7_VERTICAL_STRIDE_16 5
#define GEN7_VERTICAL_STRIDE_32 6
#define GEN7_VERTICAL_STRIDE_64 7
#define GEN7_VERTICAL_STRIDE_128 8
#define GEN7_VERTICAL_STRIDE_256 9
#define GEN7_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN7_WIDTH_1 0
#define GEN7_WIDTH_2 1
#define GEN7_WIDTH_4 2
#define GEN7_WIDTH_8 3
#define GEN7_WIDTH_16 4
 
#define GEN7_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN7_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN7_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN7_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN7_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN7_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN7_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN7_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN7_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN7_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN7_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN7_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN7_POLYGON_FACING_FRONT 0
#define GEN7_POLYGON_FACING_BACK 1
 
#define GEN7_MESSAGE_TARGET_NULL 0
#define GEN7_MESSAGE_TARGET_MATH 1
#define GEN7_MESSAGE_TARGET_SAMPLER 2
#define GEN7_MESSAGE_TARGET_GATEWAY 3
#define GEN7_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN7_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN7_MESSAGE_TARGET_URB 6
#define GEN7_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN7_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN7_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN7_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN7_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN7_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN7_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN7_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN7_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN7_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN7_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN7_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN7_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN7_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN7_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN7_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN7_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN7_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN7_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN7_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN7_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN7_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN7_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN7_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN7_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN7_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN7_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN7_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN7_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN7_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN7_MATH_FUNCTION_INV 1
#define GEN7_MATH_FUNCTION_LOG 2
#define GEN7_MATH_FUNCTION_EXP 3
#define GEN7_MATH_FUNCTION_SQRT 4
#define GEN7_MATH_FUNCTION_RSQ 5
#define GEN7_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN7_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN7_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN7_MATH_FUNCTION_TAN 9
#define GEN7_MATH_FUNCTION_POW 10
#define GEN7_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN7_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN7_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN7_MATH_INTEGER_UNSIGNED 0
#define GEN7_MATH_INTEGER_SIGNED 1
 
#define GEN7_MATH_PRECISION_FULL 0
#define GEN7_MATH_PRECISION_PARTIAL 1
 
#define GEN7_MATH_SATURATE_NONE 0
#define GEN7_MATH_SATURATE_SATURATE 1
 
#define GEN7_MATH_DATA_VECTOR 0
#define GEN7_MATH_DATA_SCALAR 1
 
#define GEN7_URB_OPCODE_WRITE 0
 
#define GEN7_URB_SWIZZLE_NONE 0
#define GEN7_URB_SWIZZLE_INTERLEAVE 1
#define GEN7_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN7_SCRATCH_SPACE_SIZE_1K 0
#define GEN7_SCRATCH_SPACE_SIZE_2K 1
#define GEN7_SCRATCH_SPACE_SIZE_4K 2
#define GEN7_SCRATCH_SPACE_SIZE_8K 3
#define GEN7_SCRATCH_SPACE_SIZE_16K 4
#define GEN7_SCRATCH_SPACE_SIZE_32K 5
#define GEN7_SCRATCH_SPACE_SIZE_64K 6
#define GEN7_SCRATCH_SPACE_SIZE_128K 7
#define GEN7_SCRATCH_SPACE_SIZE_256K 8
#define GEN7_SCRATCH_SPACE_SIZE_512K 9
#define GEN7_SCRATCH_SPACE_SIZE_1M 10
#define GEN7_SCRATCH_SPACE_SIZE_2M 11
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen7_sampler_default_border_color {
float color[4];
};
 
struct gen7_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen7_blend_state {
struct {
uint32_t dest_blend_factor:5;
uint32_t source_blend_factor:5;
uint32_t pad3:1;
uint32_t blend_func:3;
uint32_t pad2:1;
uint32_t ia_dest_blend_factor:5;
uint32_t ia_source_blend_factor:5;
uint32_t pad1:1;
uint32_t ia_blend_func:3;
uint32_t pad0:1;
uint32_t ia_blend_enable:1;
uint32_t blend_enable:1;
} blend0;
 
struct {
uint32_t post_blend_clamp_enable:1;
uint32_t pre_blend_clamp_enable:1;
uint32_t clamp_range:2;
uint32_t pad0:4;
uint32_t x_dither_offset:2;
uint32_t y_dither_offset:2;
uint32_t dither_enable:1;
uint32_t alpha_test_func:3;
uint32_t alpha_test_enable:1;
uint32_t pad1:1;
uint32_t logic_op_func:4;
uint32_t logic_op_enable:1;
uint32_t pad2:1;
uint32_t write_disable_b:1;
uint32_t write_disable_g:1;
uint32_t write_disable_r:1;
uint32_t write_disable_a:1;
uint32_t pad3:1;
uint32_t alpha_to_coverage_dither:1;
uint32_t alpha_to_one:1;
uint32_t alpha_to_coverage:1;
} blend1;
};
 
struct gen7_color_calc_state {
struct {
uint32_t alpha_test_format:1;
uint32_t pad0:14;
uint32_t round_disable:1;
uint32_t bf_stencil_ref:8;
uint32_t stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
uint32_t ui:8;
uint32_t pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen7_depth_stencil_state {
struct {
uint32_t pad0:3;
uint32_t bf_stencil_pass_depth_pass_op:3;
uint32_t bf_stencil_pass_depth_fail_op:3;
uint32_t bf_stencil_fail_op:3;
uint32_t bf_stencil_func:3;
uint32_t bf_stencil_enable:1;
uint32_t pad1:2;
uint32_t stencil_write_enable:1;
uint32_t stencil_pass_depth_pass_op:3;
uint32_t stencil_pass_depth_fail_op:3;
uint32_t stencil_fail_op:3;
uint32_t stencil_func:3;
uint32_t stencil_enable:1;
} ds0;
 
struct {
uint32_t bf_stencil_write_mask:8;
uint32_t bf_stencil_test_mask:8;
uint32_t stencil_write_mask:8;
uint32_t stencil_test_mask:8;
} ds1;
 
struct {
uint32_t pad0:26;
uint32_t depth_write_enable:1;
uint32_t depth_test_func:3;
uint32_t pad1:1;
uint32_t depth_test_enable:1;
} ds2;
};
 
struct gen7_surface_state {
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad2:2;
unsigned int render_cache_read_write:1;
unsigned int pad1:1;
unsigned int surface_array_spacing:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int horizontal_alignment:1;
unsigned int vertical_alignment:2;
unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */
unsigned int pad0:1;
unsigned int is_array:1;
unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
} ss0;
 
struct {
unsigned int base_addr;
} ss1;
 
struct {
unsigned int width:14;
unsigned int pad1:2;
unsigned int height:14;
unsigned int pad0:2;
} ss2;
 
struct {
unsigned int pitch:18;
unsigned int pad:3;
unsigned int depth:11;
} ss3;
 
struct {
unsigned int multisample_position_palette_index:3;
unsigned int num_multisamples:3;
unsigned int multisampled_surface_storage_format:1;
unsigned int render_target_view_extent:11;
unsigned int min_array_elt:11;
unsigned int rotation:2;
unsigned int pad0:1;
} ss4;
 
struct {
unsigned int mip_count:4;
unsigned int min_lod:4;
unsigned int pad1:12;
unsigned int y_offset:4;
unsigned int pad0:1;
unsigned int x_offset:7;
} ss5;
 
struct {
unsigned int pad; /* Multisample Control Surface stuff */
} ss6;
 
struct {
unsigned int resource_min_lod:12;
unsigned int pad0:16;
unsigned int alpha_clear_color:1;
unsigned int blue_clear_color:1;
unsigned int green_clear_color:1;
unsigned int red_clear_color:1;
} ss7;
};
 
struct gen7_sampler_state {
struct {
unsigned int aniso_algorithm:1;
unsigned int lod_bias:13;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad1:1;
unsigned int lod_preclamp:1;
unsigned int default_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct {
unsigned int cube_control_mode:1;
unsigned int shadow_function:3;
unsigned int pad:4;
unsigned int max_lod:12;
unsigned int min_lod:12;
} ss1;
 
struct {
unsigned int pad:5;
unsigned int default_color_pointer:27;
} ss2;
 
struct {
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:1;
unsigned int non_normalized_coord:1;
unsigned int trilinear_quality:2;
unsigned int address_round:6;
unsigned int max_aniso:3;
unsigned int chroma_key_mode:1;
unsigned int chroma_key_index:2;
unsigned int chroma_key_enable:1;
unsigned int pad0:6;
} ss3;
};
 
/* Surface state DW0 */
#define GEN7_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN7_SURFACE_TILED (1 << 14)
#define GEN7_SURFACE_TILED_Y (1 << 13)
#define GEN7_SURFACE_FORMAT_SHIFT 18
#define GEN7_SURFACE_TYPE_SHIFT 29
 
/* Surface state DW2 */
#define GEN7_SURFACE_HEIGHT_SHIFT 16
#define GEN7_SURFACE_WIDTH_SHIFT 0
 
/* Surface state DW3 */
#define GEN7_SURFACE_DEPTH_SHIFT 21
#define GEN7_SURFACE_PITCH_SHIFT 0
 
#define HSW_SWIZZLE_ZERO 0
#define HSW_SWIZZLE_ONE 1
#define HSW_SWIZZLE_RED 4
#define HSW_SWIZZLE_GREEN 5
#define HSW_SWIZZLE_BLUE 6
#define HSW_SWIZZLE_ALPHA 7
#define __HSW_SURFACE_SWIZZLE(r,g,b,a) \
((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25)
#define HSW_SURFACE_SWIZZLE(r,g,b,a) \
__HSW_SURFACE_SWIZZLE(HSW_SWIZZLE_##r, HSW_SWIZZLE_##g, HSW_SWIZZLE_##b, HSW_SWIZZLE_##a)
 
/* _3DSTATE_VERTEX_BUFFERS on GEN7*/
/* DW1 */
#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
 
/* _3DPRIMITIVE on GEN7 */
/* DW1 */
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
 
#define GEN7_3DSTATE_CLEAR_PARAMS GEN7_3D(3, 0, 0x04)
#define GEN7_3DSTATE_DEPTH_BUFFER GEN7_3D(3, 0, 0x05)
# define GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
/* DW1 */
# define GEN7_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define GEN7_3DSTATE_CONSTANT_HS GEN7_3D(3, 0, 0x19)
#define GEN7_3DSTATE_CONSTANT_DS GEN7_3D(3, 0, 0x1a)
 
#define GEN7_3DSTATE_HS GEN7_3D(3, 0, 0x1b)
#define GEN7_3DSTATE_TE GEN7_3D(3, 0, 0x1c)
#define GEN7_3DSTATE_DS GEN7_3D(3, 0, 0x1d)
#define GEN7_3DSTATE_STREAMOUT GEN7_3D(3, 0, 0x1e)
#define GEN7_3DSTATE_SBE GEN7_3D(3, 0, 0x1f)
 
/* DW1 */
# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
 
#define GEN7_3DSTATE_PS GEN7_3D(3, 0, 0x20)
/* DW1: kernel pointer */
/* DW2 */
# define GEN7_PS_SPF_MODE (1 << 31)
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define IVB_PS_MAX_THREADS_SHIFT 24
# define HSW_PS_MAX_THREADS_SHIFT 23
# define HSW_PS_SAMPLE_MASK_SHIFT 12
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
/* DW5 */
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
/* DW6: kernel 1 pointer */
/* DW7: kernel 2 pointer */
 
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL GEN7_3D(3, 0, 0x21)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN7_3D(3, 0, 0x23)
 
#define GEN7_3DSTATE_BLEND_STATE_POINTERS GEN7_3D(3, 0, 0x24)
#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS GEN7_3D(3, 0, 0x25)
 
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS GEN7_3D(3, 0, 0x26)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS GEN7_3D(3, 0, 0x27)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS GEN7_3D(3, 0, 0x28)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS GEN7_3D(3, 0, 0x29)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS GEN7_3D(3, 0, 0x2a)
 
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN7_3D(3, 0, 0x2b)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN7_3D(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN7_3D(3, 0, 0x2f)
 
#define GEN7_3DSTATE_URB_VS GEN7_3D(3, 0, 0x30)
#define GEN7_3DSTATE_URB_HS GEN7_3D(3, 0, 0x31)
#define GEN7_3DSTATE_URB_DS GEN7_3D(3, 0, 0x32)
#define GEN7_3DSTATE_URB_GS GEN7_3D(3, 0, 0x33)
/* DW1 */
# define GEN7_URB_ENTRY_NUMBER_SHIFT 0
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
 
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN7_3D(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN7_3D(3, 1, 0x16)
/* DW1 */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
struct gen7_cc_viewport {
float min_depth;
float max_depth;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
#endif
/drivers/video/Intel-2D/utils.c
0,0 → 1,150
 
#include <stdint.h>
#include <stdio.h>
 
 
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx)
: "memory");
}
 
/* Some CPUID calls want 'count' to be placed in ecx */
static inline void cpuid_count(unsigned int op, int count,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = count;
native_cpuid(eax, ebx, ecx, edx);
}
 
 
enum _cache_type {
CACHE_TYPE_NULL = 0,
CACHE_TYPE_DATA = 1,
CACHE_TYPE_INST = 2,
CACHE_TYPE_UNIFIED = 3
};
 
 
union _cpuid4_leaf_eax {
struct {
enum _cache_type type:5;
unsigned int level:3;
unsigned int is_self_initializing:1;
unsigned int is_fully_associative:1;
unsigned int reserved:4;
unsigned int num_threads_sharing:12;
unsigned int num_cores_on_die:6;
} split;
uint32_t full;
};
 
union _cpuid4_leaf_ebx {
struct {
unsigned int coherency_line_size:12;
unsigned int physical_line_partition:10;
unsigned int ways_of_associativity:10;
} split;
uint32_t full;
};
 
union _cpuid4_leaf_ecx {
struct {
unsigned int number_of_sets:32;
} split;
uint32_t full;
};
 
struct _cpuid4_info_regs {
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
};
 
static int
cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned edx;
 
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 
if (eax.split.type == CACHE_TYPE_NULL)
return -1; /* better error ? */
 
this_leaf->eax = eax;
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
(ebx.split.coherency_line_size + 1) *
(ebx.split.physical_line_partition + 1) *
(ebx.split.ways_of_associativity + 1);
return 0;
}
 
static int find_num_cache_leaves()
{
unsigned int eax, ebx, ecx, edx, op;
union _cpuid4_leaf_eax cache_eax;
int i = -1;
 
do {
++i;
/* Do cpuid(op) loop to find out num_cache_leaves */
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
} while (cache_eax.split.type != CACHE_TYPE_NULL);
return i;
};
 
unsigned int cpu_cache_size()
{
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int num_cache_leaves;
 
num_cache_leaves = find_num_cache_leaves();
 
for (i = 0; i < num_cache_leaves; i++)
{
struct _cpuid4_info_regs this_leaf;
int retval;
 
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
switch (this_leaf.eax.split.level)
{
case 1:
if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
new_l1d = this_leaf.size;
else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
new_l1i = this_leaf.size;
break;
case 2:
new_l2 = this_leaf.size;
break;
case 3:
new_l3 = this_leaf.size;
break;
default:
break;
}
}
}
printf("l2 cache %d l3 cache %d\n", new_l2, new_l3);
 
return new_l3 != 0 ? new_l3 : new_l2;
};
/drivers/video/Intel-2D/drm.h
0,0 → 1,825
/**
* \file drm.h
* Header for the Direct Rendering Manager
*
* \author Rickard E. (Rik) Faith <faith@valinux.com>
*
* \par Acknowledgments:
* Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
*/
 
/*
* Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
* Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef _DRM_H_
#define _DRM_H_
 
#include <stddef.h>
//#include <asm/ioctl.h>
 
typedef int8_t __s8;
typedef uint8_t __u8;
typedef int16_t __s16;
typedef uint16_t __u16;
typedef int32_t __s32;
typedef uint32_t __u32;
typedef int64_t __s64;
typedef uint64_t __u64;
typedef unsigned int drm_handle_t;
 
 
#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */
#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */
#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */
#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */
 
#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */
#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */
#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD)
#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT)
#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT))
 
typedef unsigned int drm_context_t;
typedef unsigned int drm_drawable_t;
typedef unsigned int drm_magic_t;
 
/**
* Cliprect.
*
* \warning: If you change this structure, make sure you change
* XF86DRIClipRectRec in the server as well
*
* \note KW: Actually it's illegal to change either for
* backwards-compatibility reasons.
*/
struct drm_clip_rect {
unsigned short x1;
unsigned short y1;
unsigned short x2;
unsigned short y2;
};
 
/**
* Drawable information.
*/
struct drm_drawable_info {
unsigned int num_rects;
struct drm_clip_rect *rects;
};
 
/**
* Texture region,
*/
struct drm_tex_region {
unsigned char next;
unsigned char prev;
unsigned char in_use;
unsigned char padding;
unsigned int age;
};
 
/**
* Hardware lock.
*
* The lock structure is a simple cache-line aligned integer. To avoid
* processor bus contention on a multiprocessor system, there should not be any
* other data stored in the same cache line.
*/
struct drm_hw_lock {
__volatile__ unsigned int lock; /**< lock variable */
char padding[60]; /**< Pad to cache line */
};
 
/**
* DRM_IOCTL_VERSION ioctl argument type.
*
* \sa drmGetVersion().
*/
struct drm_version {
int version_major; /**< Major version */
int version_minor; /**< Minor version */
int version_patchlevel; /**< Patch level */
size_t name_len; /**< Length of name buffer */
char *name; /**< Name of driver */
size_t date_len; /**< Length of date buffer */
char *date; /**< User-space buffer to hold date */
size_t desc_len; /**< Length of desc buffer */
char *desc; /**< User-space buffer to hold desc */
};
 
/**
* DRM_IOCTL_GET_UNIQUE ioctl argument type.
*
* \sa drmGetBusid() and drmSetBusId().
*/
struct drm_unique {
size_t unique_len; /**< Length of unique */
char *unique; /**< Unique name for driver instantiation */
};
 
struct drm_list {
int count; /**< Length of user-space structures */
struct drm_version *version;
};
 
struct drm_block {
int unused;
};
 
/**
* DRM_IOCTL_CONTROL ioctl argument type.
*
* \sa drmCtlInstHandler() and drmCtlUninstHandler().
*/
struct drm_control {
enum {
DRM_ADD_COMMAND,
DRM_RM_COMMAND,
DRM_INST_HANDLER,
DRM_UNINST_HANDLER
} func;
int irq;
};
 
/**
* Type of memory to map.
*/
enum drm_map_type {
_DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */
_DRM_REGISTERS = 1, /**< no caching, no core dump */
_DRM_SHM = 2, /**< shared, cached */
_DRM_AGP = 3, /**< AGP/GART */
_DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */
_DRM_CONSISTENT = 5, /**< Consistent memory for PCI DMA */
_DRM_GEM = 6, /**< GEM object */
};
 
/**
* Memory mapping flags.
*/
enum drm_map_flags {
_DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */
_DRM_READ_ONLY = 0x02,
_DRM_LOCKED = 0x04, /**< shared, cached, locked */
_DRM_KERNEL = 0x08, /**< kernel requires access */
_DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */
_DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */
_DRM_REMOVABLE = 0x40, /**< Removable mapping */
_DRM_DRIVER = 0x80 /**< Managed by driver */
};
 
struct drm_ctx_priv_map {
unsigned int ctx_id; /**< Context requesting private mapping */
void *handle; /**< Handle of map */
};
 
/**
* DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
* argument type.
*
* \sa drmAddMap().
*/
struct drm_map {
unsigned long offset; /**< Requested physical address (0 for SAREA)*/
unsigned long size; /**< Requested physical size (bytes) */
enum drm_map_type type; /**< Type of memory to map */
enum drm_map_flags flags; /**< Flags */
void *handle; /**< User-space: "Handle" to pass to mmap() */
/**< Kernel-space: kernel-virtual address */
int mtrr; /**< MTRR slot used */
/* Private data */
};
 
/**
* DRM_IOCTL_GET_CLIENT ioctl argument type.
*/
struct drm_client {
int idx; /**< Which client desired? */
int auth; /**< Is client authenticated? */
unsigned long pid; /**< Process ID */
unsigned long uid; /**< User ID */
unsigned long magic; /**< Magic */
unsigned long iocs; /**< Ioctl count */
};
 
enum drm_stat_type {
_DRM_STAT_LOCK,
_DRM_STAT_OPENS,
_DRM_STAT_CLOSES,
_DRM_STAT_IOCTLS,
_DRM_STAT_LOCKS,
_DRM_STAT_UNLOCKS,
_DRM_STAT_VALUE, /**< Generic value */
_DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */
_DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */
 
_DRM_STAT_IRQ, /**< IRQ */
_DRM_STAT_PRIMARY, /**< Primary DMA bytes */
_DRM_STAT_SECONDARY, /**< Secondary DMA bytes */
_DRM_STAT_DMA, /**< DMA */
_DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */
_DRM_STAT_MISSED /**< Missed DMA opportunity */
/* Add to the *END* of the list */
};
 
/**
* DRM_IOCTL_GET_STATS ioctl argument type.
*/
struct drm_stats {
unsigned long count;
struct {
unsigned long value;
enum drm_stat_type type;
} data[15];
};
 
/**
* Hardware locking flags.
*/
enum drm_lock_flags {
_DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */
_DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */
_DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */
_DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */
/* These *HALT* flags aren't supported yet
-- they will be used to support the
full-screen DGA-like mode. */
_DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */
_DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */
};
 
/**
* DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
*
* \sa drmGetLock() and drmUnlock().
*/
struct drm_lock {
int context;
enum drm_lock_flags flags;
};
 
/**
* DMA flags
*
* \warning
* These values \e must match xf86drm.h.
*
* \sa drm_dma.
*/
enum drm_dma_flags {
/* Flags for DMA buffer dispatch */
_DRM_DMA_BLOCK = 0x01, /**<
* Block until buffer dispatched.
*
* \note The buffer may not yet have
* been processed by the hardware --
* getting a hardware lock with the
* hardware quiescent will ensure
* that the buffer has been
* processed.
*/
_DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */
_DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */
 
/* Flags for DMA buffer request */
_DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */
_DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */
_DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */
};
 
/**
* DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
*
* \sa drmAddBufs().
*/
struct drm_buf_desc {
int count; /**< Number of buffers of this size */
int size; /**< Size in bytes */
int low_mark; /**< Low water mark */
int high_mark; /**< High water mark */
enum {
_DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */
_DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */
_DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */
_DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */
_DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */
} flags;
unsigned long agp_start; /**<
* Start address of where the AGP buffers are
* in the AGP aperture
*/
};
 
/**
* DRM_IOCTL_INFO_BUFS ioctl argument type.
*/
struct drm_buf_info {
int count; /**< Entries in list */
struct drm_buf_desc *list;
};
 
/**
* DRM_IOCTL_FREE_BUFS ioctl argument type.
*/
struct drm_buf_free {
int count;
int *list;
};
 
/**
* Buffer information
*
* \sa drm_buf_map.
*/
struct drm_buf_pub {
int idx; /**< Index into the master buffer list */
int total; /**< Buffer size */
int used; /**< Amount of buffer in use (for DMA) */
void *address; /**< Address of buffer */
};
 
/**
* DRM_IOCTL_MAP_BUFS ioctl argument type.
*/
struct drm_buf_map {
int count; /**< Length of the buffer list */
#ifdef __cplusplus
void *virt;
#else
void *virtual; /**< Mmap'd area in user-virtual */
#endif
struct drm_buf_pub *list; /**< Buffer information */
};
 
/**
* DRM_IOCTL_DMA ioctl argument type.
*
* Indices here refer to the offset into the buffer list in drm_buf_get.
*
* \sa drmDMA().
*/
struct drm_dma {
int context; /**< Context handle */
int send_count; /**< Number of buffers to send */
int *send_indices; /**< List of handles to buffers */
int *send_sizes; /**< Lengths of data to send */
enum drm_dma_flags flags; /**< Flags */
int request_count; /**< Number of buffers requested */
int request_size; /**< Desired size for buffers */
int *request_indices; /**< Buffer information */
int *request_sizes;
int granted_count; /**< Number of buffers granted */
};
 
enum drm_ctx_flags {
_DRM_CONTEXT_PRESERVED = 0x01,
_DRM_CONTEXT_2DONLY = 0x02
};
 
/**
* DRM_IOCTL_ADD_CTX ioctl argument type.
*
* \sa drmCreateContext() and drmDestroyContext().
*/
struct drm_ctx {
drm_context_t handle;
enum drm_ctx_flags flags;
};
 
/**
* DRM_IOCTL_RES_CTX ioctl argument type.
*/
struct drm_ctx_res {
int count;
struct drm_ctx *contexts;
};
 
/**
* DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
*/
struct drm_draw {
drm_drawable_t handle;
};
 
/**
* DRM_IOCTL_UPDATE_DRAW ioctl argument type.
*/
typedef enum {
DRM_DRAWABLE_CLIPRECTS,
} drm_drawable_info_type_t;
 
struct drm_update_draw {
drm_drawable_t handle;
unsigned int type;
unsigned int num;
unsigned long long data;
};
 
/**
* DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
*/
struct drm_auth {
drm_magic_t magic;
};
 
/**
* DRM_IOCTL_IRQ_BUSID ioctl argument type.
*
* \sa drmGetInterruptFromBusID().
*/
struct drm_irq_busid {
int irq; /**< IRQ number */
int busnum; /**< bus number */
int devnum; /**< device number */
int funcnum; /**< function number */
};
 
enum drm_vblank_seq_type {
_DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */
_DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */
_DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */
_DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */
_DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */
_DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */
_DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */
};
 
#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
_DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS)
 
struct drm_wait_vblank_request {
enum drm_vblank_seq_type type;
unsigned int sequence;
unsigned long signal;
};
 
struct drm_wait_vblank_reply {
enum drm_vblank_seq_type type;
unsigned int sequence;
long tval_sec;
long tval_usec;
};
 
/**
* DRM_IOCTL_WAIT_VBLANK ioctl argument type.
*
* \sa drmWaitVBlank().
*/
union drm_wait_vblank {
struct drm_wait_vblank_request request;
struct drm_wait_vblank_reply reply;
};
 
#define _DRM_PRE_MODESET 1
#define _DRM_POST_MODESET 2
 
/**
* DRM_IOCTL_MODESET_CTL ioctl argument type
*
* \sa drmModesetCtl().
*/
struct drm_modeset_ctl {
__u32 crtc;
__u32 cmd;
};
 
/**
* DRM_IOCTL_AGP_ENABLE ioctl argument type.
*
* \sa drmAgpEnable().
*/
struct drm_agp_mode {
unsigned long mode; /**< AGP mode */
};
 
/**
* DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
*
* \sa drmAgpAlloc() and drmAgpFree().
*/
struct drm_agp_buffer {
unsigned long size; /**< In bytes -- will round to page boundary */
unsigned long handle; /**< Used for binding / unbinding */
unsigned long type; /**< Type of memory to allocate */
unsigned long physical; /**< Physical used by i810 */
};
 
/**
* DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
*
* \sa drmAgpBind() and drmAgpUnbind().
*/
struct drm_agp_binding {
unsigned long handle; /**< From drm_agp_buffer */
unsigned long offset; /**< In bytes -- will round to page boundary */
};
 
/**
* DRM_IOCTL_AGP_INFO ioctl argument type.
*
* \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
* drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(),
* drmAgpVendorId() and drmAgpDeviceId().
*/
struct drm_agp_info {
int agp_version_major;
int agp_version_minor;
unsigned long mode;
unsigned long aperture_base; /* physical address */
unsigned long aperture_size; /* bytes */
unsigned long memory_allowed; /* bytes */
unsigned long memory_used;
 
/* PCI information */
unsigned short id_vendor;
unsigned short id_device;
};
 
/**
* DRM_IOCTL_SG_ALLOC ioctl argument type.
*/
struct drm_scatter_gather {
unsigned long size; /**< In bytes -- will round to page boundary */
unsigned long handle; /**< Used for mapping / unmapping */
};
 
/**
* DRM_IOCTL_SET_VERSION ioctl argument type.
*/
struct drm_set_version {
int drm_di_major;
int drm_di_minor;
int drm_dd_major;
int drm_dd_minor;
};
 
/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
struct drm_gem_close {
/** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
 
/** DRM_IOCTL_GEM_FLINK ioctl argument type */
struct drm_gem_flink {
/** Handle for the object being named */
__u32 handle;
 
/** Returned global name */
__u32 name;
};
 
/** DRM_IOCTL_GEM_OPEN ioctl argument type */
struct drm_gem_open {
/** Name of object being opened */
__u32 name;
 
/** Returned handle for the object */
__u32 handle;
 
/** Returned size of the object */
__u64 size;
};
 
/** DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
__u64 capability;
__u64 value;
};
 
#define DRM_CLOEXEC O_CLOEXEC
struct drm_prime_handle {
__u32 handle;
 
/** Flags.. only applicable for handle->fd */
__u32 flags;
 
/** Returned dmabuf file descriptor */
__s32 fd;
};
 
//#include "drm_mode.h"
 
#if 0
 
#define DRM_IOCTL_BASE 'd'
#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr)
#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type)
#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type)
#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type)
 
#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version)
#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique)
#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth)
#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid)
#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map)
#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client)
#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats)
#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version)
#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl)
#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close)
#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink)
#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open)
#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap)
 
#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique)
#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth)
#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block)
#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block)
#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control)
#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map)
#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc)
#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc)
#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info)
#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map)
#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free)
 
#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map)
 
#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map)
#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map)
 
#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e)
#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f)
 
#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx)
#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx)
#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx)
#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx)
#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx)
#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx)
#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res)
#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw)
#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw)
#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma)
#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock)
#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
 
#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle)
#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle)
 
#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30)
#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31)
#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode)
#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info)
#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer)
#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer)
#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding)
#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding)
 
#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather)
#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather)
 
#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank)
 
#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw)
 
#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res)
#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc)
#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc)
#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor)
#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder)
#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector)
#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd)
#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd)
 
#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property)
#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
 
#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb)
#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res)
#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane)
#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane)
#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2)
#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties)
#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property)
 
#endif
 
/**
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x99.
* Generic IOCTLS restart at 0xA0.
*
* \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
* drmCommandReadWrite().
*/
#define DRM_COMMAND_BASE 0x40
#define DRM_COMMAND_END 0xA0
 
/**
* Header for events written back to userspace on the drm fd. The
* type defines the type of event, the length specifies the total
* length of the event (including the header), and user_data is
* typically a 64 bit value passed with the ioctl that triggered the
* event. A read on the drm fd will always only return complete
* events, that is, if for example the read buffer is 100 bytes, and
* there are two 64 byte events pending, only one will be returned.
*
* Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
* up are chipset specific.
*/
struct drm_event {
__u32 type;
__u32 length;
};
 
#define DRM_EVENT_VBLANK 0x01
#define DRM_EVENT_FLIP_COMPLETE 0x02
 
struct drm_event_vblank {
struct drm_event base;
__u64 user_data;
__u32 tv_sec;
__u32 tv_usec;
__u32 sequence;
__u32 reserved;
};
 
#define DRM_CAP_DUMB_BUFFER 0x1
#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3
#define DRM_CAP_DUMB_PREFER_SHADOW 0x4
#define DRM_CAP_PRIME 0x5
 
#define DRM_PRIME_CAP_IMPORT 0x1
#define DRM_PRIME_CAP_EXPORT 0x2
 
/* typedef area */
typedef struct drm_clip_rect drm_clip_rect_t;
typedef struct drm_drawable_info drm_drawable_info_t;
typedef struct drm_tex_region drm_tex_region_t;
typedef struct drm_hw_lock drm_hw_lock_t;
typedef struct drm_version drm_version_t;
typedef struct drm_unique drm_unique_t;
typedef struct drm_list drm_list_t;
typedef struct drm_block drm_block_t;
typedef struct drm_control drm_control_t;
typedef enum drm_map_type drm_map_type_t;
typedef enum drm_map_flags drm_map_flags_t;
typedef struct drm_ctx_priv_map drm_ctx_priv_map_t;
typedef struct drm_map drm_map_t;
typedef struct drm_client drm_client_t;
typedef enum drm_stat_type drm_stat_type_t;
typedef struct drm_stats drm_stats_t;
typedef enum drm_lock_flags drm_lock_flags_t;
typedef struct drm_lock drm_lock_t;
typedef enum drm_dma_flags drm_dma_flags_t;
typedef struct drm_buf_desc drm_buf_desc_t;
typedef struct drm_buf_info drm_buf_info_t;
typedef struct drm_buf_free drm_buf_free_t;
typedef struct drm_buf_pub drm_buf_pub_t;
typedef struct drm_buf_map drm_buf_map_t;
typedef struct drm_dma drm_dma_t;
typedef union drm_wait_vblank drm_wait_vblank_t;
typedef struct drm_agp_mode drm_agp_mode_t;
typedef enum drm_ctx_flags drm_ctx_flags_t;
typedef struct drm_ctx drm_ctx_t;
typedef struct drm_ctx_res drm_ctx_res_t;
typedef struct drm_draw drm_draw_t;
typedef struct drm_update_draw drm_update_draw_t;
typedef struct drm_auth drm_auth_t;
typedef struct drm_irq_busid drm_irq_busid_t;
typedef enum drm_vblank_seq_type drm_vblank_seq_type_t;
 
typedef struct drm_agp_buffer drm_agp_buffer_t;
typedef struct drm_agp_binding drm_agp_binding_t;
typedef struct drm_agp_info drm_agp_info_t;
typedef struct drm_scatter_gather drm_scatter_gather_t;
typedef struct drm_set_version drm_set_version_t;
 
#endif
/drivers/video/Intel-2D/gen4_vertex.h
0,0 → 1,16
#ifndef GEN4_VERTEX_H
#define GEN4_VERTEX_H
 
#include "compiler.h"
 
#include "sna.h"
#include "sna_render.h"
 
void gen4_vertex_flush(struct sna *sna);
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
 
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
 
#endif /* GEN4_VERTEX_H */
/drivers/video/Intel-2D/gen6_render.h
0,0 → 1,1563
#ifndef GEN6_RENDER_H
#define GEN6_RENDER_H
 
#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16
# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8
# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 20)
#define VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN6_3DSTATE_PIPELINED_POINTERS */
#define GEN6_GS_DISABLE 0
#define GEN6_GS_ENABLE 1
#define GEN6_CLIP_DISABLE 0
#define GEN6_CLIP_ENABLE 1
 
/* for GEN6_PIPE_CONTROL */
#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN6_ANISORATIO_2 0
#define GEN6_ANISORATIO_4 1
#define GEN6_ANISORATIO_6 2
#define GEN6_ANISORATIO_8 3
#define GEN6_ANISORATIO_10 4
#define GEN6_ANISORATIO_12 5
#define GEN6_ANISORATIO_14 6
#define GEN6_ANISORATIO_16 7
 
#define GEN6_BLENDFACTOR_ONE 0x1
#define GEN6_BLENDFACTOR_SRC_COLOR 0x2
#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN6_BLENDFACTOR_DST_ALPHA 0x4
#define GEN6_BLENDFACTOR_DST_COLOR 0x5
#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN6_BLENDFACTOR_CONST_COLOR 0x7
#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN6_BLENDFACTOR_ZERO 0x11
#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN6_BLENDFUNCTION_ADD 0
#define GEN6_BLENDFUNCTION_SUBTRACT 1
#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN6_BLENDFUNCTION_MIN 3
#define GEN6_BLENDFUNCTION_MAX 4
 
#define GEN6_ALPHATEST_FORMAT_UNORM8 0
#define GEN6_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN6_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN6_CLIP_API_OGL 0
#define GEN6_CLIP_API_DX 1
 
#define GEN6_CLIPMODE_NORMAL 0
#define GEN6_CLIPMODE_CLIP_ALL 1
#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN6_CLIPMODE_REJECT_ALL 3
#define GEN6_CLIPMODE_ACCEPT_ALL 4
 
#define GEN6_CLIP_NDCSPACE 0
#define GEN6_CLIP_SCREENSPACE 1
 
#define GEN6_COMPAREFUNCTION_ALWAYS 0
#define GEN6_COMPAREFUNCTION_NEVER 1
#define GEN6_COMPAREFUNCTION_LESS 2
#define GEN6_COMPAREFUNCTION_EQUAL 3
#define GEN6_COMPAREFUNCTION_LEQUAL 4
#define GEN6_COMPAREFUNCTION_GREATER 5
#define GEN6_COMPAREFUNCTION_NOTEQUAL 6
#define GEN6_COMPAREFUNCTION_GEQUAL 7
 
#define GEN6_COVERAGE_PIXELS_HALF 0
#define GEN6_COVERAGE_PIXELS_1 1
#define GEN6_COVERAGE_PIXELS_2 2
#define GEN6_COVERAGE_PIXELS_4 3
 
#define GEN6_CULLMODE_BOTH 0
#define GEN6_CULLMODE_NONE 1
#define GEN6_CULLMODE_FRONT 2
#define GEN6_CULLMODE_BACK 3
 
#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN6_DEPTHFORMAT_D32_FLOAT 1
#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN6_DEPTHFORMAT_D16_UNORM 5
 
#define GEN6_FLOATING_POINT_IEEE_754 0
#define GEN6_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN6_FRONTWINDING_CW 0
#define GEN6_FRONTWINDING_CCW 1
 
#define GEN6_INDEX_BYTE 0
#define GEN6_INDEX_WORD 1
#define GEN6_INDEX_DWORD 2
 
#define GEN6_LOGICOPFUNCTION_CLEAR 0
#define GEN6_LOGICOPFUNCTION_NOR 1
#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN6_LOGICOPFUNCTION_INVERT 5
#define GEN6_LOGICOPFUNCTION_XOR 6
#define GEN6_LOGICOPFUNCTION_NAND 7
#define GEN6_LOGICOPFUNCTION_AND 8
#define GEN6_LOGICOPFUNCTION_EQUIV 9
#define GEN6_LOGICOPFUNCTION_NOOP 10
#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN6_LOGICOPFUNCTION_COPY 12
#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN6_LOGICOPFUNCTION_OR 14
#define GEN6_LOGICOPFUNCTION_SET 15
 
#define GEN6_MAPFILTER_NEAREST 0x0
#define GEN6_MAPFILTER_LINEAR 0x1
#define GEN6_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN6_MIPFILTER_NONE 0
#define GEN6_MIPFILTER_NEAREST 1
#define GEN6_MIPFILTER_LINEAR 3
 
#define GEN6_POLYGON_FRONT_FACING 0
#define GEN6_POLYGON_BACK_FACING 1
 
#define GEN6_PREFILTER_ALWAYS 0x0
#define GEN6_PREFILTER_NEVER 0x1
#define GEN6_PREFILTER_LESS 0x2
#define GEN6_PREFILTER_EQUAL 0x3
#define GEN6_PREFILTER_LEQUAL 0x4
#define GEN6_PREFILTER_GREATER 0x5
#define GEN6_PREFILTER_NOTEQUAL 0x6
#define GEN6_PREFILTER_GEQUAL 0x7
 
#define GEN6_PROVOKING_VERTEX_0 0
#define GEN6_PROVOKING_VERTEX_1 1
#define GEN6_PROVOKING_VERTEX_2 2
 
#define GEN6_RASTRULE_UPPER_LEFT 0
#define GEN6_RASTRULE_UPPER_RIGHT 1
 
#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN6_STENCILOP_KEEP 0
#define GEN6_STENCILOP_ZERO 1
#define GEN6_STENCILOP_REPLACE 2
#define GEN6_STENCILOP_INCRSAT 3
#define GEN6_STENCILOP_DECRSAT 4
#define GEN6_STENCILOP_INCR 5
#define GEN6_STENCILOP_DECR 6
#define GEN6_STENCILOP_INVERT 7
 
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN6_SURFACEFORMAT_R16_SINT 0x10C
#define GEN6_SURFACEFORMAT_R16_UINT 0x10D
#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN6_SURFACEFORMAT_I16_UNORM 0x111
#define GEN6_SURFACEFORMAT_L16_UNORM 0x112
#define GEN6_SURFACEFORMAT_A16_UNORM 0x113
#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN6_SURFACEFORMAT_R8_UNORM 0x140
#define GEN6_SURFACEFORMAT_R8_SNORM 0x141
#define GEN6_SURFACEFORMAT_R8_SINT 0x142
#define GEN6_SURFACEFORMAT_R8_UINT 0x143
#define GEN6_SURFACEFORMAT_A8_UNORM 0x144
#define GEN6_SURFACEFORMAT_I8_UNORM 0x145
#define GEN6_SURFACEFORMAT_L8_UNORM 0x146
#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN6_SURFACEFORMAT_R1_UINT 0x181
#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN6_SURFACEFORMAT_MONO8 0x18E
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN6_SURFACEFORMAT_FXT1 0x192
#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN6_SURFACERETURNFORMAT_FLOAT32 0
#define GEN6_SURFACERETURNFORMAT_S1 1
 
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
#define GEN6_BORDER_COLOR_MODE_DEFAULT 0
#define GEN6_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN6_TEXCOORDMODE_WRAP 0
#define GEN6_TEXCOORDMODE_MIRROR 1
#define GEN6_TEXCOORDMODE_CLAMP 2
#define GEN6_TEXCOORDMODE_CUBE 3
#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN6_THREAD_PRIORITY_NORMAL 0
#define GEN6_THREAD_PRIORITY_HIGH 1
 
#define GEN6_TILEWALK_XMAJOR 0
#define GEN6_TILEWALK_YMAJOR 1
 
#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define GEN6_VFCOMPONENT_NOSTORE 0
#define GEN6_VFCOMPONENT_STORE_SRC 1
#define GEN6_VFCOMPONENT_STORE_0 2
#define GEN6_VFCOMPONENT_STORE_1_FLT 3
#define GEN6_VFCOMPONENT_STORE_1_INT 4
#define GEN6_VFCOMPONENT_STORE_VID 5
#define GEN6_VFCOMPONENT_STORE_IID 6
#define GEN6_VFCOMPONENT_STORE_PID 7
 
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN6_ALIGN_1 0
#define GEN6_ALIGN_16 1
 
#define GEN6_ADDRESS_DIRECT 0
#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN6_CHANNEL_X 0
#define GEN6_CHANNEL_Y 1
#define GEN6_CHANNEL_Z 2
#define GEN6_CHANNEL_W 3
 
#define GEN6_COMPRESSION_NONE 0
#define GEN6_COMPRESSION_2NDHALF 1
#define GEN6_COMPRESSION_COMPRESSED 2
 
#define GEN6_CONDITIONAL_NONE 0
#define GEN6_CONDITIONAL_Z 1
#define GEN6_CONDITIONAL_NZ 2
#define GEN6_CONDITIONAL_EQ 1 /* Z */
#define GEN6_CONDITIONAL_NEQ 2 /* NZ */
#define GEN6_CONDITIONAL_G 3
#define GEN6_CONDITIONAL_GE 4
#define GEN6_CONDITIONAL_L 5
#define GEN6_CONDITIONAL_LE 6
#define GEN6_CONDITIONAL_C 7
#define GEN6_CONDITIONAL_O 8
 
#define GEN6_DEBUG_NONE 0
#define GEN6_DEBUG_BREAKPOINT 1
 
#define GEN6_DEPENDENCY_NORMAL 0
#define GEN6_DEPENDENCY_NOTCLEARED 1
#define GEN6_DEPENDENCY_NOTCHECKED 2
#define GEN6_DEPENDENCY_DISABLE 3
 
#define GEN6_EXECUTE_1 0
#define GEN6_EXECUTE_2 1
#define GEN6_EXECUTE_4 2
#define GEN6_EXECUTE_8 3
#define GEN6_EXECUTE_16 4
#define GEN6_EXECUTE_32 5
 
#define GEN6_HORIZONTAL_STRIDE_0 0
#define GEN6_HORIZONTAL_STRIDE_1 1
#define GEN6_HORIZONTAL_STRIDE_2 2
#define GEN6_HORIZONTAL_STRIDE_4 3
 
#define GEN6_INSTRUCTION_NORMAL 0
#define GEN6_INSTRUCTION_SATURATE 1
 
#define GEN6_MASK_ENABLE 0
#define GEN6_MASK_DISABLE 1
 
#define GEN6_OPCODE_MOV 1
#define GEN6_OPCODE_SEL 2
#define GEN6_OPCODE_NOT 4
#define GEN6_OPCODE_AND 5
#define GEN6_OPCODE_OR 6
#define GEN6_OPCODE_XOR 7
#define GEN6_OPCODE_SHR 8
#define GEN6_OPCODE_SHL 9
#define GEN6_OPCODE_RSR 10
#define GEN6_OPCODE_RSL 11
#define GEN6_OPCODE_ASR 12
#define GEN6_OPCODE_CMP 16
#define GEN6_OPCODE_JMPI 32
#define GEN6_OPCODE_IF 34
#define GEN6_OPCODE_IFF 35
#define GEN6_OPCODE_ELSE 36
#define GEN6_OPCODE_ENDIF 37
#define GEN6_OPCODE_DO 38
#define GEN6_OPCODE_WHILE 39
#define GEN6_OPCODE_BREAK 40
#define GEN6_OPCODE_CONTINUE 41
#define GEN6_OPCODE_HALT 42
#define GEN6_OPCODE_MSAVE 44
#define GEN6_OPCODE_MRESTORE 45
#define GEN6_OPCODE_PUSH 46
#define GEN6_OPCODE_POP 47
#define GEN6_OPCODE_WAIT 48
#define GEN6_OPCODE_SEND 49
#define GEN6_OPCODE_ADD 64
#define GEN6_OPCODE_MUL 65
#define GEN6_OPCODE_AVG 66
#define GEN6_OPCODE_FRC 67
#define GEN6_OPCODE_RNDU 68
#define GEN6_OPCODE_RNDD 69
#define GEN6_OPCODE_RNDE 70
#define GEN6_OPCODE_RNDZ 71
#define GEN6_OPCODE_MAC 72
#define GEN6_OPCODE_MACH 73
#define GEN6_OPCODE_LZD 74
#define GEN6_OPCODE_SAD2 80
#define GEN6_OPCODE_SADA2 81
#define GEN6_OPCODE_DP4 84
#define GEN6_OPCODE_DPH 85
#define GEN6_OPCODE_DP3 86
#define GEN6_OPCODE_DP2 87
#define GEN6_OPCODE_DPA2 88
#define GEN6_OPCODE_LINE 89
#define GEN6_OPCODE_NOP 126
 
#define GEN6_PREDICATE_NONE 0
#define GEN6_PREDICATE_NORMAL 1
#define GEN6_PREDICATE_ALIGN1_ANYV 2
#define GEN6_PREDICATE_ALIGN1_ALLV 3
#define GEN6_PREDICATE_ALIGN1_ANY2H 4
#define GEN6_PREDICATE_ALIGN1_ALL2H 5
#define GEN6_PREDICATE_ALIGN1_ANY4H 6
#define GEN6_PREDICATE_ALIGN1_ALL4H 7
#define GEN6_PREDICATE_ALIGN1_ANY8H 8
#define GEN6_PREDICATE_ALIGN1_ALL8H 9
#define GEN6_PREDICATE_ALIGN1_ANY16H 10
#define GEN6_PREDICATE_ALIGN1_ALL16H 11
#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN6_PREDICATE_ALIGN16_ANY4H 6
#define GEN6_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN6_ARCHITECTURE_REGISTER_FILE 0
#define GEN6_GENERAL_REGISTER_FILE 1
#define GEN6_MESSAGE_REGISTER_FILE 2
#define GEN6_IMMEDIATE_VALUE 3
 
#define GEN6_REGISTER_TYPE_UD 0
#define GEN6_REGISTER_TYPE_D 1
#define GEN6_REGISTER_TYPE_UW 2
#define GEN6_REGISTER_TYPE_W 3
#define GEN6_REGISTER_TYPE_UB 4
#define GEN6_REGISTER_TYPE_B 5
#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN6_REGISTER_TYPE_HF 6
#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN6_REGISTER_TYPE_F 7
 
#define GEN6_ARF_NULL 0x00
#define GEN6_ARF_ADDRESS 0x10
#define GEN6_ARF_ACCUMULATOR 0x20
#define GEN6_ARF_FLAG 0x30
#define GEN6_ARF_MASK 0x40
#define GEN6_ARF_MASK_STACK 0x50
#define GEN6_ARF_MASK_STACK_DEPTH 0x60
#define GEN6_ARF_STATE 0x70
#define GEN6_ARF_CONTROL 0x80
#define GEN6_ARF_NOTIFICATION_COUNT 0x90
#define GEN6_ARF_IP 0xA0
 
#define GEN6_AMASK 0
#define GEN6_IMASK 1
#define GEN6_LMASK 2
#define GEN6_CMASK 3
 
 
 
#define GEN6_THREAD_NORMAL 0
#define GEN6_THREAD_ATOMIC 1
#define GEN6_THREAD_SWITCH 2
 
#define GEN6_VERTICAL_STRIDE_0 0
#define GEN6_VERTICAL_STRIDE_1 1
#define GEN6_VERTICAL_STRIDE_2 2
#define GEN6_VERTICAL_STRIDE_4 3
#define GEN6_VERTICAL_STRIDE_8 4
#define GEN6_VERTICAL_STRIDE_16 5
#define GEN6_VERTICAL_STRIDE_32 6
#define GEN6_VERTICAL_STRIDE_64 7
#define GEN6_VERTICAL_STRIDE_128 8
#define GEN6_VERTICAL_STRIDE_256 9
#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN6_WIDTH_1 0
#define GEN6_WIDTH_2 1
#define GEN6_WIDTH_4 2
#define GEN6_WIDTH_8 3
#define GEN6_WIDTH_16 4
 
#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN6_POLYGON_FACING_FRONT 0
#define GEN6_POLYGON_FACING_BACK 1
 
#define GEN6_MESSAGE_TARGET_NULL 0
#define GEN6_MESSAGE_TARGET_MATH 1
#define GEN6_MESSAGE_TARGET_SAMPLER 2
#define GEN6_MESSAGE_TARGET_GATEWAY 3
#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN6_MESSAGE_TARGET_URB 6
#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN6_MATH_INTEGER_UNSIGNED 0
#define GEN6_MATH_INTEGER_SIGNED 1
 
#define GEN6_MATH_PRECISION_FULL 0
#define GEN6_MATH_PRECISION_PARTIAL 1
 
#define GEN6_MATH_SATURATE_NONE 0
#define GEN6_MATH_SATURATE_SATURATE 1
 
#define GEN6_MATH_DATA_VECTOR 0
#define GEN6_MATH_DATA_SCALAR 1
 
#define GEN6_URB_OPCODE_WRITE 0
 
#define GEN6_URB_SWIZZLE_NONE 0
#define GEN6_URB_SWIZZLE_INTERLEAVE 1
#define GEN6_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN6_SCRATCH_SPACE_SIZE_1K 0
#define GEN6_SCRATCH_SPACE_SIZE_2K 1
#define GEN6_SCRATCH_SPACE_SIZE_4K 2
#define GEN6_SCRATCH_SPACE_SIZE_8K 3
#define GEN6_SCRATCH_SPACE_SIZE_16K 4
#define GEN6_SCRATCH_SPACE_SIZE_32K 5
#define GEN6_SCRATCH_SPACE_SIZE_64K 6
#define GEN6_SCRATCH_SPACE_SIZE_128K 7
#define GEN6_SCRATCH_SPACE_SIZE_256K 8
#define GEN6_SCRATCH_SPACE_SIZE_512K 9
#define GEN6_SCRATCH_SPACE_SIZE_1M 10
#define GEN6_SCRATCH_SPACE_SIZE_2M 11
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen6_sampler_default_border_color {
float color[4];
};
 
struct gen6_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen6_sampler_state {
struct {
uint32_t shadow_function:3;
uint32_t lod_bias:11;
uint32_t min_filter:3;
uint32_t mag_filter:3;
uint32_t mip_filter:2;
uint32_t base_level:5;
uint32_t pad:1;
uint32_t lod_preclamp:1;
uint32_t border_color_mode:1;
uint32_t pad0:1;
uint32_t disable:1;
} ss0;
 
struct {
uint32_t r_wrap_mode:3;
uint32_t t_wrap_mode:3;
uint32_t s_wrap_mode:3;
uint32_t pad:3;
uint32_t max_lod:10;
uint32_t min_lod:10;
} ss1;
 
struct {
uint32_t border_color;
} ss2;
 
struct {
uint32_t non_normalized_coord:1;
uint32_t pad:12;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t monochrome_filter_width:3;
uint32_t monochrome_filter_height:3;
} ss3;
};
 
struct gen6_blend_state {
struct {
uint32_t dest_blend_factor:5;
uint32_t source_blend_factor:5;
uint32_t pad3:1;
uint32_t blend_func:3;
uint32_t pad2:1;
uint32_t ia_dest_blend_factor:5;
uint32_t ia_source_blend_factor:5;
uint32_t pad1:1;
uint32_t ia_blend_func:3;
uint32_t pad0:1;
uint32_t ia_blend_enable:1;
uint32_t blend_enable:1;
} blend0;
 
struct {
uint32_t post_blend_clamp_enable:1;
uint32_t pre_blend_clamp_enable:1;
uint32_t clamp_range:2;
uint32_t pad0:4;
uint32_t x_dither_offset:2;
uint32_t y_dither_offset:2;
uint32_t dither_enable:1;
uint32_t alpha_test_func:3;
uint32_t alpha_test_enable:1;
uint32_t pad1:1;
uint32_t logic_op_func:4;
uint32_t logic_op_enable:1;
uint32_t pad2:1;
uint32_t write_disable_b:1;
uint32_t write_disable_g:1;
uint32_t write_disable_r:1;
uint32_t write_disable_a:1;
uint32_t pad3:1;
uint32_t alpha_to_coverage_dither:1;
uint32_t alpha_to_one:1;
uint32_t alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state {
struct {
uint32_t alpha_test_format:1;
uint32_t pad0:14;
uint32_t round_disable:1;
uint32_t bf_stencil_ref:8;
uint32_t stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
uint32_t ui:8;
uint32_t pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state {
struct {
uint32_t pad0:3;
uint32_t bf_stencil_pass_depth_pass_op:3;
uint32_t bf_stencil_pass_depth_fail_op:3;
uint32_t bf_stencil_fail_op:3;
uint32_t bf_stencil_func:3;
uint32_t bf_stencil_enable:1;
uint32_t pad1:2;
uint32_t stencil_write_enable:1;
uint32_t stencil_pass_depth_pass_op:3;
uint32_t stencil_pass_depth_fail_op:3;
uint32_t stencil_fail_op:3;
uint32_t stencil_func:3;
uint32_t stencil_enable:1;
} ds0;
 
struct {
uint32_t bf_stencil_write_mask:8;
uint32_t bf_stencil_test_mask:8;
uint32_t stencil_write_mask:8;
uint32_t stencil_test_mask:8;
} ds1;
 
struct {
uint32_t pad0:26;
uint32_t depth_write_enable:1;
uint32_t depth_test_func:3;
uint32_t pad1:1;
uint32_t depth_test_enable:1;
} ds2;
};
 
struct gen6_surface_state {
struct {
uint32_t cube_pos_z:1;
uint32_t cube_neg_z:1;
uint32_t cube_pos_y:1;
uint32_t cube_neg_y:1;
uint32_t cube_pos_x:1;
uint32_t cube_neg_x:1;
uint32_t pad:3;
uint32_t render_cache_read_mode:1;
uint32_t mipmap_layout_mode:1;
uint32_t vert_line_stride_ofs:1;
uint32_t vert_line_stride:1;
uint32_t color_blend:1;
uint32_t writedisable_blue:1;
uint32_t writedisable_green:1;
uint32_t writedisable_red:1;
uint32_t writedisable_alpha:1;
uint32_t surface_format:9;
uint32_t data_return_format:1;
uint32_t pad0:1;
uint32_t surface_type:3;
} ss0;
 
struct {
uint32_t base_addr;
} ss1;
 
struct {
uint32_t render_target_rotation:2;
uint32_t mip_count:4;
uint32_t width:13;
uint32_t height:13;
} ss2;
 
struct {
uint32_t tile_walk:1;
uint32_t tiled_surface:1;
uint32_t pad:1;
uint32_t pitch:18;
uint32_t depth:11;
} ss3;
 
struct {
uint32_t pad:19;
uint32_t min_array_elt:9;
uint32_t min_lod:4;
} ss4;
 
struct {
uint32_t pad:20;
uint32_t y_offset:4;
uint32_t pad2:1;
uint32_t x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN6_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN6_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN6_SURFACE_FORMAT_SHIFT 18
#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define GEN6_SURFACE_TYPE_SHIFT 29
#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29)
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN6_SURFACE_HEIGHT_SHIFT 19
#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19)
#define GEN6_SURFACE_WIDTH_SHIFT 6
#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6)
#define GEN6_SURFACE_LOD_SHIFT 2
#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2)
 
/* Surface state DW3 */
#define GEN6_SURFACE_DEPTH_SHIFT 21
#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21)
#define GEN6_SURFACE_PITCH_SHIFT 3
#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3)
#define GEN6_SURFACE_TILED (1 << 1)
#define GEN6_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN6_SURFACE_MIN_LOD_SHIFT 28
#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28)
 
/* Surface state DW5 */
#define GEN6_SURFACE_X_OFFSET_SHIFT 25
#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25)
#define GEN6_SURFACE_Y_OFFSET_SHIFT 20
#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20)
 
struct gen6_cc_viewport {
float min_depth;
float max_depth;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
#endif
/drivers/video/Intel-2D/intel.h
0,0 → 1,152
 
 
/** enumeration of 3d consumers so some can maintain invariant state. */
enum last_3d {
LAST_3D_OTHER,
LAST_3D_VIDEO,
LAST_3D_RENDER,
LAST_3D_ROTATION
};
 
 
 
typedef struct intel_screen_private {
int cpp;
 
#define RENDER_BATCH I915_EXEC_RENDER
#define BLT_BATCH I915_EXEC_BLT
 
unsigned int current_batch;
 
dri_bufmgr *bufmgr;
 
uint32_t batch_ptr[4096];
/** Byte offset in batch_ptr for the next dword to be emitted. */
unsigned int batch_used;
/** Position in batch_ptr at the start of the current BEGIN_BATCH */
unsigned int batch_emit_start;
/** Number of bytes to be emitted in the current BEGIN_BATCH. */
uint32_t batch_emitting;
dri_bo *batch_bo, *last_batch_bo[2];
/** Whether we're in a section of code that can't tolerate flushing */
Bool in_batch_atomic;
/** Ending batch_used that was verified by intel_start_batch_atomic() */
int batch_atomic_limit;
struct list batch_pixmaps;
drm_intel_bo *wa_scratch_bo;
 
unsigned int tiling;
 
#define INTEL_TILING_FB 0x1
#define INTEL_TILING_2D 0x2
#define INTEL_TILING_3D 0x4
#define INTEL_TILING_ALL (~0)
 
Bool has_relaxed_fencing;
 
int Chipset;
 
unsigned int BR[20];
 
void (*vertex_flush) (struct intel_screen_private *intel);
void (*batch_flush) (struct intel_screen_private *intel);
void (*batch_commit_notify) (struct intel_screen_private *intel);
 
Bool need_sync;
 
int accel_pixmap_offset_alignment;
int accel_max_x;
int accel_max_y;
int max_bo_size;
int max_gtt_map_size;
int max_tiling_size;
 
struct {
drm_intel_bo *gen4_vs_bo;
drm_intel_bo *gen4_sf_bo;
drm_intel_bo *gen4_wm_packed_bo;
drm_intel_bo *gen4_wm_planar_bo;
drm_intel_bo *gen4_cc_bo;
drm_intel_bo *gen4_cc_vp_bo;
drm_intel_bo *gen4_sampler_bo;
drm_intel_bo *gen4_sip_kernel_bo;
drm_intel_bo *wm_prog_packed_bo;
drm_intel_bo *wm_prog_planar_bo;
drm_intel_bo *gen6_blend_bo;
drm_intel_bo *gen6_depth_stencil_bo;
} video;
 
/* Render accel state */
float scale_units[2][2];
/** Transform pointers for src/mask, or NULL if identity */
PictTransform *transform[2];
 
PixmapPtr render_source, render_mask, render_dest;
PicturePtr render_source_picture, render_mask_picture, render_dest_picture;
Bool needs_3d_invariant;
Bool needs_render_state_emit;
Bool needs_render_vertex_emit;
 
/* i830 render accel state */
uint32_t render_dest_format;
uint32_t cblend, ablend, s8_blendctl;
 
/* i915 render accel state */
PixmapPtr texture[2];
uint32_t mapstate[6];
uint32_t samplerstate[6];
 
struct {
int op;
uint32_t dst_format;
} i915_render_state;
 
struct {
int num_sf_outputs;
int drawrect;
uint32_t blend;
dri_bo *samplers;
dri_bo *kernel;
} gen6_render_state;
 
uint32_t prim_offset;
void (*prim_emit)(struct intel_screen_private *intel,
int srcX, int srcY,
int maskX, int maskY,
int dstX, int dstY,
int w, int h);
int floats_per_vertex;
int last_floats_per_vertex;
uint16_t vertex_offset;
uint16_t vertex_count;
uint16_t vertex_index;
uint16_t vertex_used;
uint32_t vertex_id;
float vertex_ptr[4*1024];
dri_bo *vertex_bo;
 
uint8_t surface_data[16*1024];
uint16_t surface_used;
uint16_t surface_table;
uint32_t surface_reloc;
dri_bo *surface_bo;
 
/* 965 render acceleration state */
struct gen4_render_state *gen4_render_state;
 
Bool use_pageflipping;
Bool use_triple_buffer;
Bool force_fallback;
Bool has_kernel_flush;
Bool needs_flush;
 
enum last_3d last_3d;
 
/**
* User option to print acceleration fallback info to the server log.
*/
Bool fallback_debug;
unsigned debug_flush;
Bool has_prime_vmap_flush;
} intel_screen_private;
 
/drivers/video/Intel-2D/intel_driver.h
0,0 → 1,288
#ifndef INTEL_DRIVER_H
#define INTEL_DRIVER_H
 
#define INTEL_VERSION 4000
#define INTEL_NAME "intel"
#define INTEL_DRIVER_NAME "intel"
 
#define INTEL_VERSION_MAJOR PACKAGE_VERSION_MAJOR
#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR
#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL
 
#ifndef PCI_CHIP_I810
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
#define PCI_CHIP_I810_BRIDGE 0x7120
#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
#define PCI_CHIP_I810_E_BRIDGE 0x7124
#define PCI_CHIP_I815_BRIDGE 0x1130
#endif
 
#ifndef PCI_CHIP_I830_M
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_I830_M_BRIDGE 0x3575
#endif
 
#ifndef PCI_CHIP_845_G
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_845_G_BRIDGE 0x2560
#endif
 
#ifndef PCI_CHIP_I854
#define PCI_CHIP_I854 0x358E
#define PCI_CHIP_I854_BRIDGE 0x358C
#endif
 
#ifndef PCI_CHIP_I855_GM
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I855_GM_BRIDGE 0x3580
#endif
 
#ifndef PCI_CHIP_I865_G
#define PCI_CHIP_I865_G 0x2572
#define PCI_CHIP_I865_G_BRIDGE 0x2570
#endif
 
#ifndef PCI_CHIP_I915_G
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_I915_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I915_GM
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I915_GM_BRIDGE 0x2590
#endif
 
#ifndef PCI_CHIP_E7221_G
#define PCI_CHIP_E7221_G 0x258A
/* Same as I915_G_BRIDGE */
#define PCI_CHIP_E7221_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I945_G
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_G_BRIDGE 0x2770
#endif
 
#ifndef PCI_CHIP_I945_GM
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
#endif
 
#ifndef PCI_CHIP_I945_GME
#define PCI_CHIP_I945_GME 0x27AE
#define PCI_CHIP_I945_GME_BRIDGE 0x27AC
#endif
 
#ifndef PCI_CHIP_PINEVIEW_M
#define PCI_CHIP_PINEVIEW_M 0xA011
#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010
#define PCI_CHIP_PINEVIEW_G 0xA001
#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000
#endif
 
#ifndef PCI_CHIP_G35_G
#define PCI_CHIP_G35_G 0x2982
#define PCI_CHIP_G35_G_BRIDGE 0x2980
#endif
 
#ifndef PCI_CHIP_I965_Q
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_Q_BRIDGE 0x2990
#endif
 
#ifndef PCI_CHIP_I965_G
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_G_BRIDGE 0x29A0
#endif
 
#ifndef PCI_CHIP_I946_GZ
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I946_GZ_BRIDGE 0x2970
#endif
 
#ifndef PCI_CHIP_I965_GM
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GM_BRIDGE 0x2A00
#endif
 
#ifndef PCI_CHIP_I965_GME
#define PCI_CHIP_I965_GME 0x2A12
#define PCI_CHIP_I965_GME_BRIDGE 0x2A10
#endif
 
#ifndef PCI_CHIP_G33_G
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_G33_G_BRIDGE 0x29C0
#endif
 
#ifndef PCI_CHIP_Q35_G
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_Q35_G_BRIDGE 0x29B0
#endif
 
#ifndef PCI_CHIP_Q33_G
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_Q33_G_BRIDGE 0x29D0
#endif
 
#ifndef PCI_CHIP_GM45_GM
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_GM45_BRIDGE 0x2A40
#endif
 
#ifndef PCI_CHIP_G45_E_G
#define PCI_CHIP_G45_E_G 0x2E02
#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00
#endif
 
#ifndef PCI_CHIP_G45_G
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G45_G_BRIDGE 0x2E20
#endif
 
#ifndef PCI_CHIP_Q45_G
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_Q45_G_BRIDGE 0x2E10
#endif
 
#ifndef PCI_CHIP_G41_G
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_G41_G_BRIDGE 0x2E30
#endif
 
#ifndef PCI_CHIP_B43_G
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_B43_G_BRIDGE 0x2E40
#endif
 
#ifndef PCI_CHIP_B43_G1
#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_B43_G1_BRIDGE 0x2E90
#endif
 
#ifndef PCI_CHIP_IRONLAKE_D_G
#define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040
#endif
 
#ifndef PCI_CHIP_IRONLAKE_M_G
#define PCI_CHIP_IRONLAKE_M_G 0x0046
#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044
#endif
 
#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
 
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156
#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
#define PCI_CHIP_IVYBRIDGE_D_GT1 0x0152
#define PCI_CHIP_IVYBRIDGE_D_GT2 0x0162
#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a
#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
 
#define PCI_CHIP_HASWELL_D_GT1 0x0402
#define PCI_CHIP_HASWELL_D_GT2 0x0412
#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422
#define PCI_CHIP_HASWELL_M_GT1 0x0406
#define PCI_CHIP_HASWELL_M_GT2 0x0416
#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426
#define PCI_CHIP_HASWELL_S_GT1 0x040A
#define PCI_CHIP_HASWELL_S_GT2 0x041A
#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A
#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02
#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12
#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16
#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A
#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02
#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12
#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22
#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
 
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30
#define PCI_CHIP_VALLEYVIEW_1 0x0f31
#define PCI_CHIP_VALLEYVIEW_2 0x0f32
#define PCI_CHIP_VALLEYVIEW_3 0x0f33
 
#endif
 
#define I85X_CAPID 0x44
#define I85X_VARIANT_MASK 0x7
#define I85X_VARIANT_SHIFT 5
#define I855_GME 0x0
#define I855_GM 0x4
#define I852_GME 0x2
#define I852_GM 0x5
 
#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr
#define VENDOR_ID(p) (p)->vendor_id
#define DEVICE_ID(p) (p)->device_id
#define SUBVENDOR_ID(p) (p)->subvendor_id
#define SUBSYS_ID(p) (p)->subdevice_id
#define CHIP_REVISION(p) (p)->revision
 
#define INTEL_INFO(intel) ((intel)->info)
#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
#define IS_GEN1(intel) IS_GENx(intel, 1)
#define IS_GEN2(intel) IS_GENx(intel, 2)
#define IS_GEN3(intel) IS_GENx(intel, 3)
#define IS_GEN4(intel) IS_GENx(intel, 4)
#define IS_GEN5(intel) IS_GENx(intel, 5)
#define IS_GEN6(intel) IS_GENx(intel, 6)
#define IS_GEN7(intel) IS_GENx(intel, 7)
#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
 
/* Some chips have specific errata (or limits) that we need to workaround. */
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G)
#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G)
 
#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G)
#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM)
 
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
 
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
 
struct intel_device_info {
int gen;
};
 
//void intel_detect_chipset(ScrnInfoPtr scrn,
// EntityInfoPtr ent,
// struct pci_device *pci);
 
 
#endif /* INTEL_DRIVER_H */
/drivers/video/Intel-2D/intel_list.h
0,0 → 1,353
/*
* Copyright © 2010-2012 Intel Corporation
* Copyright © 2010 Francisco Jerez <currojerez@riseup.net>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
 
#ifndef _INTEL_LIST_H_
#define _INTEL_LIST_H_
 
#include <stdbool.h>
 
/**
* @file Classic doubly-link circular list implementation.
* For real usage examples of the linked list, see the file test/list.c
*
* Example:
* We need to keep a list of struct foo in the parent struct bar, i.e. what
* we want is something like this.
*
* struct bar {
* ...
* struct foo *list_of_foos; -----> struct foo {}, struct foo {}, struct foo{}
* ...
* }
*
* We need one list head in bar and a list element in all list_of_foos (both are of
* data type 'struct list').
*
* struct bar {
* ...
* struct list list_of_foos;
* ...
* }
*
* struct foo {
* ...
* struct list entry;
* ...
* }
*
* Now we initialize the list head:
*
* struct bar bar;
* ...
* list_init(&bar.list_of_foos);
*
* Then we create the first element and add it to this list:
*
* struct foo *foo = malloc(...);
* ....
* list_add(&foo->entry, &bar.list_of_foos);
*
* Repeat the above for each element you want to add to the list. Deleting
* works with the element itself.
* list_del(&foo->entry);
* free(foo);
*
* Note: calling list_del(&bar.list_of_foos) will set bar.list_of_foos to an empty
* list again.
*
* Looping through the list requires a 'struct foo' as iterator and the
* name of the field the subnodes use.
*
* struct foo *iterator;
* list_for_each_entry(iterator, &bar.list_of_foos, entry) {
* if (iterator->something == ...)
* ...
* }
*
* Note: You must not call list_del() on the iterator if you continue the
* loop. You need to run the safe for-each loop instead:
*
* struct foo *iterator, *next;
* list_for_each_entry_safe(iterator, next, &bar.list_of_foos, entry) {
* if (...)
* list_del(&iterator->entry);
* }
*
*/
 
/**
* The linkage struct for list nodes. This struct must be part of your
* to-be-linked struct. struct list is required for both the head of the
* list and for each list node.
*
* Position and name of the struct list field is irrelevant.
* There are no requirements that elements of a list are of the same type.
* There are no requirements for a list head, any struct list can be a list
* head.
*/
struct list {
struct list *next, *prev;
};
 
/**
* Initialize the list as an empty list.
*
* Example:
* list_init(&bar->list_of_foos);
*
* @param The list to initialized.
*/
static void
list_init(struct list *list)
{
list->next = list->prev = list;
}
 
static inline void
__list_add(struct list *entry,
struct list *prev,
struct list *next)
{
next->prev = entry;
entry->next = next;
entry->prev = prev;
prev->next = entry;
}
 
/**
* Insert a new element after the given list head. The new element does not
* need to be initialised as empty list.
* The list changes from:
* head → some element → ...
* to
* head → new element → older element → ...
*
* Example:
* struct foo *newfoo = malloc(...);
* list_add(&newfoo->entry, &bar->list_of_foos);
*
* @param entry The new element to prepend to the list.
* @param head The existing list.
*/
static inline void
list_add(struct list *entry, struct list *head)
{
__list_add(entry, head, head->next);
}
 
static inline void
list_add_tail(struct list *entry, struct list *head)
{
__list_add(entry, head->prev, head);
}
 
static inline void list_replace(struct list *old,
struct list *new)
{
new->next = old->next;
new->next->prev = new;
new->prev = old->prev;
new->prev->next = new;
}
 
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
 
#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)
 
/**
* Append a new element to the end of the list given with this list head.
*
* The list changes from:
* head → some element → ... → lastelement
* to
* head → some element → ... → lastelement → new element
*
* Example:
* struct foo *newfoo = malloc(...);
* list_append(&newfoo->entry, &bar->list_of_foos);
*
* @param entry The new element to prepend to the list.
* @param head The existing list.
*/
static inline void
list_append(struct list *entry, struct list *head)
{
__list_add(entry, head->prev, head);
}
 
 
static inline void
__list_del(struct list *prev, struct list *next)
{
assert(next->prev == prev->next);
next->prev = prev;
prev->next = next;
}
 
static inline void
_list_del(struct list *entry)
{
assert(entry->prev->next == entry);
assert(entry->next->prev == entry);
__list_del(entry->prev, entry->next);
}
 
/**
* Remove the element from the list it is in. Using this function will reset
* the pointers to/from this element so it is removed from the list. It does
* NOT free the element itself or manipulate it otherwise.
*
* Using list_del on a pure list head (like in the example at the top of
* this file) will NOT remove the first element from
* the list but rather reset the list as empty list.
*
* Example:
* list_del(&foo->entry);
*
* @param entry The element to remove.
*/
static inline void
list_del(struct list *entry)
{
_list_del(entry);
list_init(entry);
}
 
static inline void list_move(struct list *list, struct list *head)
{
if (list->prev != head) {
_list_del(list);
list_add(list, head);
}
}
 
static inline void list_move_tail(struct list *list, struct list *head)
{
_list_del(list);
list_add_tail(list, head);
}
 
/**
* Check if the list is empty.
*
* Example:
* list_is_empty(&bar->list_of_foos);
*
* @return True if the list contains one or more elements or False otherwise.
*/
static inline bool
list_is_empty(struct list *head)
{
return head->next == head;
}
 
/**
* Alias of container_of
*/
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
 
/**
* Retrieve the first list entry for the given list pointer.
*
* Example:
* struct foo *first;
* first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos);
*
* @param ptr The list head
* @param type Data type of the list element to retrieve
* @param member Member name of the struct list field in the list element.
* @return A pointer to the first list element.
*/
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
 
/**
* Retrieve the last list entry for the given listpointer.
*
* Example:
* struct foo *first;
* first = list_last_entry(&bar->list_of_foos, struct foo, list_of_foos);
*
* @param ptr The list head
* @param type Data type of the list element to retrieve
* @param member Member name of the struct list field in the list element.
* @return A pointer to the last list element.
*/
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
 
#define __container_of(ptr, sample, member) \
(void *)((char *)(ptr) \
- ((char *)&(sample)->member - (char *)(sample)))
/**
* Loop through the list given by head and set pos to struct in the list.
*
* Example:
* struct foo *iterator;
* list_for_each_entry(iterator, &bar->list_of_foos, entry) {
* [modify iterator]
* }
*
* This macro is not safe for node deletion. Use list_for_each_entry_safe
* instead.
*
* @param pos Iterator variable of the type of the list elements.
* @param head List head
* @param member Member name of the struct list in the list elements.
*
*/
#define list_for_each_entry(pos, head, member) \
for (pos = __container_of((head)->next, pos, member); \
&pos->member != (head); \
pos = __container_of(pos->member.next, pos, member))
 
#define list_for_each_entry_reverse(pos, head, member) \
for (pos = __container_of((head)->prev, pos, member); \
&pos->member != (head); \
pos = __container_of(pos->member.prev, pos, member))
 
/**
* Loop through the list, keeping a backup pointer to the element. This
* macro allows for the deletion of a list element while looping through the
* list.
*
* See list_for_each_entry for more details.
*/
#define list_for_each_entry_safe(pos, tmp, head, member) \
for (pos = __container_of((head)->next, pos, member), \
tmp = __container_of(pos->member.next, pos, member); \
&pos->member != (head); \
pos = tmp, tmp = __container_of(pos->member.next, tmp, member))
 
 
#undef container_of
#define container_of(ptr, type, member) \
((type *)((char *)(ptr) - (char *) &((type *)0)->member))
 
#endif /* _INTEL_LIST_H_ */
 
/drivers/video/Intel-2D/pciaccess.h
0,0 → 1,537
/*
* (C) Copyright IBM Corporation 2006
* Copyright 2009 Red Hat, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2007 Paulo R. Zanoni, Tiago Vignatti
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file pciaccess.h
*
* \author Ian Romanick <idr@us.ibm.com>
*/
 
#ifndef PCIACCESS_H
#define PCIACCESS_H
 
#include <inttypes.h>
 
#if __GNUC__ >= 3
#define __deprecated __attribute__((deprecated))
#else
#define __deprecated
#endif
 
typedef uint64_t pciaddr_t;
 
struct pci_device;
struct pci_device_iterator;
struct pci_id_match;
struct pci_slot_match;
 
#ifdef __cplusplus
extern "C" {
#endif
 
int pci_device_has_kernel_driver(struct pci_device *dev);
 
int pci_device_is_boot_vga(struct pci_device *dev);
 
int pci_device_read_rom(struct pci_device *dev, void *buffer);
 
int __deprecated pci_device_map_region(struct pci_device *dev,
unsigned region, int write_enable);
 
int __deprecated pci_device_unmap_region(struct pci_device *dev,
unsigned region);
 
int pci_device_map_range(struct pci_device *dev, pciaddr_t base,
pciaddr_t size, unsigned map_flags, void **addr);
 
int pci_device_unmap_range(struct pci_device *dev, void *memory,
pciaddr_t size);
 
int __deprecated pci_device_map_memory_range(struct pci_device *dev,
pciaddr_t base, pciaddr_t size, int write_enable, void **addr);
 
int __deprecated pci_device_unmap_memory_range(struct pci_device *dev,
void *memory, pciaddr_t size);
 
int pci_device_probe(struct pci_device *dev);
 
const struct pci_agp_info *pci_device_get_agp_info(struct pci_device *dev);
 
const struct pci_bridge_info *pci_device_get_bridge_info(
struct pci_device *dev);
 
const struct pci_pcmcia_bridge_info *pci_device_get_pcmcia_bridge_info(
struct pci_device *dev);
 
int pci_device_get_bridge_buses(struct pci_device *dev, int *primary_bus,
int *secondary_bus, int *subordinate_bus);
 
int pci_system_init(void);
 
void pci_system_init_dev_mem(int fd);
 
void pci_system_cleanup(void);
 
struct pci_device_iterator *pci_slot_match_iterator_create(
const struct pci_slot_match *match);
 
struct pci_device_iterator *pci_id_match_iterator_create(
const struct pci_id_match *match);
 
void pci_iterator_destroy(struct pci_device_iterator *iter);
 
struct pci_device *pci_device_next(struct pci_device_iterator *iter);
 
struct pci_device *pci_device_find_by_slot(uint32_t domain, uint32_t bus,
uint32_t dev, uint32_t func);
 
struct pci_device *pci_device_get_parent_bridge(struct pci_device *dev);
 
void pci_get_strings(const struct pci_id_match *m,
const char **device_name, const char **vendor_name,
const char **subdevice_name, const char **subvendor_name);
const char *pci_device_get_device_name(const struct pci_device *dev);
const char *pci_device_get_subdevice_name(const struct pci_device *dev);
const char *pci_device_get_vendor_name(const struct pci_device *dev);
const char *pci_device_get_subvendor_name(const struct pci_device *dev);
 
void pci_device_enable(struct pci_device *dev);
 
int pci_device_cfg_read (struct pci_device *dev, void *data,
pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_read);
int pci_device_cfg_read_u8 (struct pci_device *dev, uint8_t *data,
pciaddr_t offset);
int pci_device_cfg_read_u16(struct pci_device *dev, uint16_t *data,
pciaddr_t offset);
int pci_device_cfg_read_u32(struct pci_device *dev, uint32_t *data,
pciaddr_t offset);
 
int pci_device_cfg_write (struct pci_device *dev, const void *data,
pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_written);
int pci_device_cfg_write_u8 (struct pci_device *dev, uint8_t data,
pciaddr_t offset);
int pci_device_cfg_write_u16(struct pci_device *dev, uint16_t data,
pciaddr_t offset);
int pci_device_cfg_write_u32(struct pci_device *dev, uint32_t data,
pciaddr_t offset);
int pci_device_cfg_write_bits(struct pci_device *dev, uint32_t mask,
uint32_t data, pciaddr_t offset);
 
#ifdef __cplusplus
}
#endif
 
/**
* \name Mapping flags passed to \c pci_device_map_range
*/
/*@{*/
#define PCI_DEV_MAP_FLAG_WRITABLE (1U<<0)
#define PCI_DEV_MAP_FLAG_WRITE_COMBINE (1U<<1)
#define PCI_DEV_MAP_FLAG_CACHABLE (1U<<2)
/*@}*/
 
 
#define PCI_MATCH_ANY (~0)
 
/**
* Compare two PCI ID values (either vendor or device). This is used
* internally to compare the fields of \c pci_id_match to the fields of
* \c pci_device.
*/
#define PCI_ID_COMPARE(a, b) \
(((a) == PCI_MATCH_ANY) || ((a) == (b)))
 
/**
*/
struct pci_id_match {
/**
* \name Device / vendor matching controls
*
* Control the search based on the device, vendor, subdevice, or subvendor
* IDs. Setting any of these fields to \c PCI_MATCH_ANY will cause the
* field to not be used in the comparison.
*/
/*@{*/
uint32_t vendor_id;
uint32_t device_id;
uint32_t subvendor_id;
uint32_t subdevice_id;
/*@}*/
 
 
/**
* \name Device class matching controls
*
*/
/*@{*/
uint32_t device_class;
uint32_t device_class_mask;
/*@}*/
 
intptr_t match_data;
};
 
 
/**
*/
struct pci_slot_match {
/**
* \name Device slot matching controls
*
* Control the search based on the domain, bus, slot, and function of
* the device. Setting any of these fields to \c PCI_MATCH_ANY will cause
* the field to not be used in the comparison.
*/
/*@{*/
uint32_t domain;
uint32_t bus;
uint32_t dev;
uint32_t func;
/*@}*/
 
intptr_t match_data;
};
 
/**
* BAR descriptor for a PCI device.
*/
struct pci_mem_region {
/**
* When the region is mapped, this is the pointer to the memory.
*
* This field is \b only set when the deprecated \c pci_device_map_region
* interface is used. Use \c pci_device_map_range instead.
*
* \deprecated
*/
void *memory;
 
 
/**
* Base physical address of the region within its bus / domain.
*
* \warning
* This address is really only useful to other devices in the same
* domain. It's probably \b not the address applications will ever
* use.
*
* \warning
* Most (all?) platform back-ends leave this field unset.
*/
pciaddr_t bus_addr;
 
 
/**
* Base physical address of the region from the CPU's point of view.
*
* This address is typically passed to \c pci_device_map_range to create
* a mapping of the region to the CPU's virtual address space.
*/
pciaddr_t base_addr;
 
 
/**
* Size, in bytes, of the region.
*/
pciaddr_t size;
 
 
/**
* Is the region I/O ports or memory?
*/
unsigned is_IO:1;
 
/**
* Is the memory region prefetchable?
*
* \note
* This can only be set if \c is_IO is not set.
*/
unsigned is_prefetchable:1;
 
 
/**
* Is the memory at a 64-bit address?
*
* \note
* This can only be set if \c is_IO is not set.
*/
unsigned is_64:1;
};
 
 
/**
* PCI device.
*
* Contains all of the information about a particular PCI device.
*/
struct pci_device {
/**
* \name Device bus identification.
*
* Complete bus identification, including domain, of the device. On
* platforms that do not support PCI domains (e.g., 32-bit x86 hardware),
* the domain will always be zero.
*/
/*@{*/
uint16_t domain;
uint8_t bus;
uint8_t dev;
uint8_t func;
/*@}*/
 
 
/**
* \name Vendor / device ID
*
* The vendor ID, device ID, and sub-IDs for the device.
*/
/*@{*/
uint16_t vendor_id;
uint16_t device_id;
uint16_t subvendor_id;
uint16_t subdevice_id;
/*@}*/
 
/**
* Device's class, subclass, and programming interface packed into a
* single 32-bit value. The class is at bits [23:16], subclass is at
* bits [15:8], and programming interface is at [7:0].
*/
uint32_t device_class;
 
 
/**
* Device revision number, as read from the configuration header.
*/
uint8_t revision;
 
 
/**
* BAR descriptors for the device.
*/
struct pci_mem_region regions[6];
 
 
/**
* Size, in bytes, of the device's expansion ROM.
*/
pciaddr_t rom_size;
 
 
/**
* IRQ associated with the device. If there is no IRQ, this value will
* be -1.
*/
int irq;
 
 
/**
* Storage for user data. Users of the library can store arbitrary
* data in this pointer. The library will not use it for any purpose.
* It is the user's responsability to free this memory before destroying
* the \c pci_device structure.
*/
intptr_t user_data;
 
/**
* Used by the VGA arbiter. Type of resource decoded by the device and
* the file descriptor (/dev/vga_arbiter). */
int vgaarb_rsrc;
};
 
 
/**
* Description of the AGP capability of the device.
*
* \sa pci_device_get_agp_info
*/
struct pci_agp_info {
/**
* Offset of the AGP registers in the devices configuration register
* space. This is generally used so that the offset of the AGP command
* register can be determined.
*/
unsigned config_offset;
 
 
/**
* \name AGP major / minor version.
*/
/*@{*/
uint8_t major_version;
uint8_t minor_version;
/*@}*/
 
/**
* Logical OR of the supported AGP rates. For example, a value of 0x07
* means that the device can support 1x, 2x, and 4x. A value of 0x0c
* means that the device can support 8x and 4x.
*/
uint8_t rates;
 
unsigned int fast_writes:1; /**< Are fast-writes supported? */
unsigned int addr64:1;
unsigned int htrans:1;
unsigned int gart64:1;
unsigned int coherent:1;
unsigned int sideband:1; /**< Is side-band addressing supported? */
unsigned int isochronus:1;
 
uint8_t async_req_size;
uint8_t calibration_cycle_timing;
uint8_t max_requests;
};
 
/**
* Description of a PCI-to-PCI bridge device.
*
* \sa pci_device_get_bridge_info
*/
struct pci_bridge_info {
uint8_t primary_bus;
uint8_t secondary_bus;
uint8_t subordinate_bus;
uint8_t secondary_latency_timer;
 
uint8_t io_type;
uint8_t mem_type;
uint8_t prefetch_mem_type;
 
uint16_t secondary_status;
uint16_t bridge_control;
 
uint32_t io_base;
uint32_t io_limit;
 
uint32_t mem_base;
uint32_t mem_limit;
 
uint64_t prefetch_mem_base;
uint64_t prefetch_mem_limit;
};
 
/**
* Description of a PCI-to-PCMCIA bridge device.
*
* \sa pci_device_get_pcmcia_bridge_info
*/
struct pci_pcmcia_bridge_info {
uint8_t primary_bus;
uint8_t card_bus;
uint8_t subordinate_bus;
uint8_t cardbus_latency_timer;
 
uint16_t secondary_status;
uint16_t bridge_control;
 
struct {
uint32_t base;
uint32_t limit;
} io[2];
 
struct {
uint32_t base;
uint32_t limit;
} mem[2];
 
};
 
 
/**
* VGA Arbiter definitions, functions and related.
*/
 
/* Legacy VGA regions */
#define VGA_ARB_RSRC_NONE 0x00
#define VGA_ARB_RSRC_LEGACY_IO 0x01
#define VGA_ARB_RSRC_LEGACY_MEM 0x02
/* Non-legacy access */
#define VGA_ARB_RSRC_NORMAL_IO 0x04
#define VGA_ARB_RSRC_NORMAL_MEM 0x08
 
int pci_device_vgaarb_init (void);
void pci_device_vgaarb_fini (void);
int pci_device_vgaarb_set_target (struct pci_device *dev);
/* use the targetted device */
int pci_device_vgaarb_decodes (int new_vga_rsrc);
int pci_device_vgaarb_lock (void);
int pci_device_vgaarb_trylock (void);
int pci_device_vgaarb_unlock (void);
/* return the current device count + resource decodes for the device */
int pci_device_vgaarb_get_info (struct pci_device *dev, int *vga_count, int *rsrc_decodes);
 
/*
* I/O space access.
*/
 
struct pci_io_handle;
 
struct pci_io_handle *pci_device_open_io(struct pci_device *dev, pciaddr_t base,
pciaddr_t size);
struct pci_io_handle *pci_legacy_open_io(struct pci_device *dev, pciaddr_t base,
pciaddr_t size);
void pci_device_close_io(struct pci_device *dev, struct pci_io_handle *handle);
uint32_t pci_io_read32(struct pci_io_handle *handle, uint32_t reg);
uint16_t pci_io_read16(struct pci_io_handle *handle, uint32_t reg);
uint8_t pci_io_read8(struct pci_io_handle *handle, uint32_t reg);
void pci_io_write32(struct pci_io_handle *handle, uint32_t reg, uint32_t data);
void pci_io_write16(struct pci_io_handle *handle, uint32_t reg, uint16_t data);
void pci_io_write8(struct pci_io_handle *handle, uint32_t reg, uint8_t data);
 
/*
* Legacy memory access
*/
 
int pci_device_map_legacy(struct pci_device *dev, pciaddr_t base,
pciaddr_t size, unsigned map_flags, void **addr);
int pci_device_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size);
 
#endif /* PCIACCESS_H */