Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4293 → Rev 4304

/drivers/video/Intel-2D/gen4_source.h
File deleted
/drivers/video/Intel-2D/intel.h
File deleted
/drivers/video/Intel-2D/sna_stream.c
File deleted
/drivers/video/Intel-2D/sna_render.h
File deleted
/drivers/video/Intel-2D/compiler.h
File deleted
/drivers/video/Intel-2D/brw/brw_eu_emit.c
File deleted
/drivers/video/Intel-2D/brw/brw_sf.c
File deleted
/drivers/video/Intel-2D/brw/brw_eu.c
File deleted
/drivers/video/Intel-2D/brw/brw_eu.h
File deleted
/drivers/video/Intel-2D/brw/brw_wm.c
File deleted
/drivers/video/Intel-2D/brw/brw.h
File deleted
/drivers/video/Intel-2D/utils.c
File deleted
/drivers/video/Intel-2D/sna_transform.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen5.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen4.c
File deleted
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g6b
File deleted
/drivers/video/Intel-2D/render/exa_sf_mask.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_projective.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_noca.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_projective.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_write.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_write.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_affine.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_xy.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_affine.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g5b
File deleted
/drivers/video/Intel-2D/render/exa_sf.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_noca.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g7b
File deleted
/drivers/video/Intel-2D/render/exa_sf_mask.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_noca.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_projective.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_projective.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_argb.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_projective.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_planar.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_argb.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_sample_a.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_write.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_write.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_sample_a.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_affine.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_xy.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_ca_srcalpha.g6b
File deleted
/drivers/video/Intel-2D/render/exa_wm_src_affine.g7b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g4b
File deleted
/drivers/video/Intel-2D/render/exa_wm_mask_affine.g6b
File deleted
/drivers/video/Intel-2D/render/exa_sf.g5b
File deleted
/drivers/video/Intel-2D/render/exa_wm_yuv_rgb.g4b
File deleted
/drivers/video/Intel-2D/pciaccess.h
File deleted
/drivers/video/Intel-2D/gen4_vertex.c
File deleted
/drivers/video/Intel-2D/sna_cpu.c
File deleted
/drivers/video/Intel-2D/sna.c
File deleted
/drivers/video/Intel-2D/gen4_vertex.h
File deleted
/drivers/video/Intel-2D/sna.h
File deleted
/drivers/video/Intel-2D/kgem.c
File deleted
/drivers/video/Intel-2D/gen3_render.c
File deleted
/drivers/video/Intel-2D/gen4_render.c
File deleted
/drivers/video/Intel-2D/gen5_render.c
File deleted
/drivers/video/Intel-2D/gen6_render.c
File deleted
/drivers/video/Intel-2D/gen7_render.c
File deleted
/drivers/video/Intel-2D/sna_reg.h
File deleted
/drivers/video/Intel-2D/kgem_debug_gen3.c
File deleted
/drivers/video/Intel-2D/kgem.h
File deleted
/drivers/video/Intel-2D/gen3_render.h
File deleted
/drivers/video/Intel-2D/gen4_render.h
File deleted
/drivers/video/Intel-2D/gen5_render.h
File deleted
/drivers/video/Intel-2D/gen6_render.h
File deleted
/drivers/video/Intel-2D/gen7_render.h
File deleted
/drivers/video/Intel-2D/kgem_debug_gen7.c
File deleted
/drivers/video/Intel-2D/kgem_debug_gen6.c
File deleted
/drivers/video/Intel-2D/kgem_debug.h
File deleted
/drivers/video/Intel-2D/kgem_debug.c
File deleted
/drivers/video/Intel-2D/sna_cpuid.h
File deleted
/drivers/video/Intel-2D/sna_render_inline.h
File deleted
/drivers/video/Intel-2D/sna/brw/brw.h
0,0 → 1,17
#include "brw_eu.h"
 
bool brw_sf_kernel__nomask(struct brw_compile *p);
bool brw_sf_kernel__mask(struct brw_compile *p);
 
bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width);
/drivers/video/Intel-2D/sna/brw/brw_eu.c
0,0 → 1,150
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/* Returns the corresponding conditional mod for swapping src0 and
* src1 in e.g. CMP.
*/
uint32_t
brw_swap_cmod(uint32_t cmod)
{
switch (cmod) {
case BRW_CONDITIONAL_Z:
case BRW_CONDITIONAL_NZ:
return cmod;
case BRW_CONDITIONAL_G:
return BRW_CONDITIONAL_LE;
case BRW_CONDITIONAL_GE:
return BRW_CONDITIONAL_L;
case BRW_CONDITIONAL_L:
return BRW_CONDITIONAL_GE;
case BRW_CONDITIONAL_LE:
return BRW_CONDITIONAL_G;
default:
return ~0;
}
}
 
/* How does predicate control work when execution_size != 8? Do I
* need to test/set for 0xffff when execution_size is 16?
*/
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
{
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
if (value != 0xff) {
if (value != p->flag_value) {
brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
p->flag_value = value;
}
 
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
}
 
void brw_set_compression_control(struct brw_compile *p,
enum brw_compression compression_control)
{
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
 
if (p->gen >= 060) {
/* Since we don't use the 32-wide support in gen6, we translate
* the pre-gen6 compression control here.
*/
switch (compression_control) {
case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1Q;
break;
case BRW_COMPRESSION_2NDHALF:
/* For 8-wide, this is "use the second set of 8 bits." */
p->current->header.compression_control = GEN6_COMPRESSION_2Q;
break;
case BRW_COMPRESSION_COMPRESSED:
/* For 16-wide instruction compression, use the first set of 16 bits
* since we don't do 32-wide dispatch.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
default:
assert(!"not reached");
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
}
} else {
p->current->header.compression_control = compression_control;
}
}
 
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
 
void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
p->compressed = p->compressed_stack[p->current - p->stack];
}
 
void brw_compile_init(struct brw_compile *p, int gen, void *store)
{
assert(gen);
 
p->gen = gen;
p->store = store;
 
p->nr_insn = 0;
p->current = p->stack;
p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
 
/* Some defaults?
*/
brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
brw_set_saturate(p, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_predicate_control_flag_value(p, 0xff);
 
p->if_stack_depth = 0;
p->if_stack_array_size = 0;
p->if_stack = NULL;
}
/drivers/video/Intel-2D/sna/brw/brw_eu.h
0,0 → 1,2264
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
 
#ifndef BRW_EU_H
#define BRW_EU_H
 
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
 
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
 
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
 
#define WRITEMASK_X 0x1
#define WRITEMASK_Y 0x2
#define WRITEMASK_Z 0x4
#define WRITEMASK_W 0x8
 
#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
 
/** Number of general purpose registers (VS, WM, etc) */
#define BRW_MAX_GRF 128
 
/** Number of message register file registers */
#define BRW_MAX_MRF 16
 
 
#define BRW_ALIGN_1 0
#define BRW_ALIGN_16 1
 
#define BRW_ADDRESS_DIRECT 0
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define BRW_CHANNEL_X 0
#define BRW_CHANNEL_Y 1
#define BRW_CHANNEL_Z 2
#define BRW_CHANNEL_W 3
 
enum brw_compression {
BRW_COMPRESSION_NONE,
BRW_COMPRESSION_2NDHALF,
BRW_COMPRESSION_COMPRESSED,
};
 
#define GEN6_COMPRESSION_1Q 0
#define GEN6_COMPRESSION_2Q 1
#define GEN6_COMPRESSION_3Q 2
#define GEN6_COMPRESSION_4Q 3
#define GEN6_COMPRESSION_1H 0
#define GEN6_COMPRESSION_2H 2
 
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
#define BRW_CONDITIONAL_EQ 1 /* Z */
#define BRW_CONDITIONAL_NEQ 2 /* NZ */
#define BRW_CONDITIONAL_G 3
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
#define BRW_CONDITIONAL_U 9
 
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
 
#define BRW_DEPENDENCY_NORMAL 0
#define BRW_DEPENDENCY_NOTCLEARED 1
#define BRW_DEPENDENCY_NOTCHECKED 2
#define BRW_DEPENDENCY_DISABLE 3
 
#define BRW_EXECUTE_1 0
#define BRW_EXECUTE_2 1
#define BRW_EXECUTE_4 2
#define BRW_EXECUTE_8 3
#define BRW_EXECUTE_16 4
#define BRW_EXECUTE_32 5
 
#define BRW_HORIZONTAL_STRIDE_0 0
#define BRW_HORIZONTAL_STRIDE_1 1
#define BRW_HORIZONTAL_STRIDE_2 2
#define BRW_HORIZONTAL_STRIDE_4 3
 
#define BRW_INSTRUCTION_NORMAL 0
#define BRW_INSTRUCTION_SATURATE 1
 
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
 
/** @{
*
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
* effectively the same but much simpler to think about. Now, there
* are two contributors ANDed together to whether channels are
* executed: The predication on the instruction, and the channel write
* enable.
*/
/**
* This is the default value. It means that a channel's write enable is set
* if the per-channel IP is pointing at this instruction.
*/
#define BRW_WE_NORMAL 0
/**
* This is used like BRW_MASK_DISABLE, and causes all channels to have
* their write enable set. Note that predication still contributes to
* whether the channel actually gets written.
*/
#define BRW_WE_ALL 1
/** @} */
 
enum opcode {
/* These are the actual hardware opcodes. */
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
BRW_OPCODE_RSR = 10,
BRW_OPCODE_RSL = 11,
BRW_OPCODE_ASR = 12,
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_JMPI = 32,
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF = 35,
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
BRW_OPCODE_DO = 38,
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
BRW_OPCODE_MSAVE = 44,
BRW_OPCODE_MRESTORE = 45,
BRW_OPCODE_PUSH = 46,
BRW_OPCODE_POP = 47,
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
BRW_OPCODE_MATH = 56,
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
BRW_OPCODE_FRC = 67,
BRW_OPCODE_RNDU = 68,
BRW_OPCODE_RNDD = 69,
BRW_OPCODE_RNDE = 70,
BRW_OPCODE_RNDZ = 71,
BRW_OPCODE_MAC = 72,
BRW_OPCODE_MACH = 73,
BRW_OPCODE_LZD = 74,
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
BRW_OPCODE_DPA2 = 88,
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90,
BRW_OPCODE_NOP = 126,
 
/* These are compiler backend opcodes that get translated into other
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
SHADER_OPCODE_EXP2,
SHADER_OPCODE_LOG2,
SHADER_OPCODE_POW,
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
FS_OPCODE_TXD,
FS_OPCODE_TXF,
FS_OPCODE_TXL,
FS_OPCODE_TXS,
FS_OPCODE_DISCARD,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
 
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
};
 
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
#define BRW_PREDICATE_ALIGN1_ANYV 2
#define BRW_PREDICATE_ALIGN1_ALLV 3
#define BRW_PREDICATE_ALIGN1_ANY2H 4
#define BRW_PREDICATE_ALIGN1_ALL2H 5
#define BRW_PREDICATE_ALIGN1_ANY4H 6
#define BRW_PREDICATE_ALIGN1_ALL4H 7
#define BRW_PREDICATE_ALIGN1_ANY8H 8
#define BRW_PREDICATE_ALIGN1_ALL8H 9
#define BRW_PREDICATE_ALIGN1_ANY16H 10
#define BRW_PREDICATE_ALIGN1_ALL16H 11
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
#define BRW_PREDICATE_ALIGN16_ANY4H 6
#define BRW_PREDICATE_ALIGN16_ALL4H 7
 
#define BRW_ARCHITECTURE_REGISTER_FILE 0
#define BRW_GENERAL_REGISTER_FILE 1
#define BRW_MESSAGE_REGISTER_FILE 2
#define BRW_IMMEDIATE_VALUE 3
 
#define BRW_REGISTER_TYPE_UD 0
#define BRW_REGISTER_TYPE_D 1
#define BRW_REGISTER_TYPE_UW 2
#define BRW_REGISTER_TYPE_W 3
#define BRW_REGISTER_TYPE_UB 4
#define BRW_REGISTER_TYPE_B 5
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define BRW_REGISTER_TYPE_HF 6
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define BRW_REGISTER_TYPE_F 7
 
#define BRW_ARF_NULL 0x00
#define BRW_ARF_ADDRESS 0x10
#define BRW_ARF_ACCUMULATOR 0x20
#define BRW_ARF_FLAG 0x30
#define BRW_ARF_MASK 0x40
#define BRW_ARF_MASK_STACK 0x50
#define BRW_ARF_MASK_STACK_DEPTH 0x60
#define BRW_ARF_STATE 0x70
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
 
#define BRW_MRF_COMPR4 (1 << 7)
 
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
#define BRW_CMASK 3
 
#define BRW_THREAD_NORMAL 0
#define BRW_THREAD_ATOMIC 1
#define BRW_THREAD_SWITCH 2
 
#define BRW_VERTICAL_STRIDE_0 0
#define BRW_VERTICAL_STRIDE_1 1
#define BRW_VERTICAL_STRIDE_2 2
#define BRW_VERTICAL_STRIDE_4 3
#define BRW_VERTICAL_STRIDE_8 4
#define BRW_VERTICAL_STRIDE_16 5
#define BRW_VERTICAL_STRIDE_32 6
#define BRW_VERTICAL_STRIDE_64 7
#define BRW_VERTICAL_STRIDE_128 8
#define BRW_VERTICAL_STRIDE_256 9
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define BRW_WIDTH_1 0
#define BRW_WIDTH_2 1
#define BRW_WIDTH_4 2
#define BRW_WIDTH_8 3
#define BRW_WIDTH_16 4
 
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define BRW_POLYGON_FACING_FRONT 0
#define BRW_POLYGON_FACING_BACK 1
 
#define BRW_MESSAGE_TARGET_NULL 0
#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4
#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5
#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9
 
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define BRW_SAMPLER_MESSAGE_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
 
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
 
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
/* G45, GEN5 */
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
/* GEN6 */
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
 
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
/**
* Message target: Shared Function ID for where to SEND a message.
*
* These are enumerated in the ISA reference under "send - Send Message".
* In particular, see the following tables:
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
* Overview / GPE Function IDs
*/
enum brw_message_target {
BRW_SFID_NULL = 0,
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
BRW_SFID_SAMPLER = 2,
BRW_SFID_MESSAGE_GATEWAY = 3,
BRW_SFID_DATAPORT_READ = 4,
BRW_SFID_DATAPORT_WRITE = 5,
BRW_SFID_URB = 6,
BRW_SFID_THREAD_SPAWNER = 7,
 
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
GEN7_SFID_DATAPORT_DATA_CACHE = 10,
};
 
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
 
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
/* GEN6 */
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
 
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
#define BRW_MATH_FUNCTION_SQRT 4
#define BRW_MATH_FUNCTION_RSQ 5
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define BRW_MATH_INTEGER_UNSIGNED 0
#define BRW_MATH_INTEGER_SIGNED 1
 
#define BRW_MATH_PRECISION_FULL 0
#define BRW_MATH_PRECISION_PARTIAL 1
 
#define BRW_MATH_SATURATE_NONE 0
#define BRW_MATH_SATURATE_SATURATE 1
 
#define BRW_MATH_DATA_VECTOR 0
#define BRW_MATH_DATA_SCALAR 1
 
#define BRW_URB_OPCODE_WRITE 0
 
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
#define BRW_URB_SWIZZLE_TRANSPOSE 2
 
#define BRW_SCRATCH_SPACE_SIZE_1K 0
#define BRW_SCRATCH_SPACE_SIZE_2K 1
#define BRW_SCRATCH_SPACE_SIZE_4K 2
#define BRW_SCRATCH_SPACE_SIZE_8K 3
#define BRW_SCRATCH_SPACE_SIZE_16K 4
#define BRW_SCRATCH_SPACE_SIZE_32K 5
#define BRW_SCRATCH_SPACE_SIZE_64K 6
#define BRW_SCRATCH_SPACE_SIZE_128K 7
#define BRW_SCRATCH_SPACE_SIZE_256K 8
#define BRW_SCRATCH_SPACE_SIZE_512K 9
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
 
#define REG_SIZE (8*4)
 
struct brw_instruction {
struct {
unsigned opcode:7;
unsigned pad:1;
unsigned access_mode:1;
unsigned mask_control:1;
unsigned dependency_control:2;
unsigned compression_control:2; /* gen6: quater control */
unsigned thread_control:2;
unsigned predicate_control:4;
unsigned predicate_inverse:1;
unsigned execution_size:3;
/**
* Conditional Modifier for most instructions. On Gen6+, this is also
* used for the SEND instruction's Message Target/SFID.
*/
unsigned destreg__conditionalmod:4;
unsigned acc_wr_control:1;
unsigned cmpt_control:1;
unsigned debug_control:1;
unsigned saturate:1;
} header;
 
union {
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_subreg_nr:5;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2; /* 0x00000c00 */
unsigned src1_reg_type:3; /* 0x00007000 */
unsigned pad:1;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:1;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned pad0:6;
unsigned dest_writemask:4;
int dest_indirect_offset:6;
unsigned dest_subreg_nr:3;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
 
int jump_count:16;
} branch_gen6;
 
struct {
unsigned dest_reg_file:1;
unsigned flag_subreg_num:1;
unsigned pad0:2;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src2_abs:1;
unsigned src2_negate:1;
unsigned pad1:7;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:3;
unsigned dest_reg_nr:8;
} da3src;
} bits1;
 
 
union {
struct {
unsigned src0_subreg_nr:5;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} da1;
 
struct {
int src0_indirect_offset:10;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} ia1;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
unsigned src0_subreg_nr:1;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} da16;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
int src0_indirect_offset:6;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia16;
 
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
*
* Does not apply to Gen6+. The SFID/message target moved to bits
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
*/
struct {
unsigned pad:26;
unsigned end_of_thread:1;
unsigned pad1:1;
unsigned sfid:4;
} send_gen5; /* for Ironlake only */
 
struct {
unsigned src0_rep_ctrl:1;
unsigned src0_swizzle:8;
unsigned src0_subreg_nr:3;
unsigned src0_reg_nr:8;
unsigned pad0:1;
unsigned src1_rep_ctrl:1;
unsigned src1_swizzle:8;
unsigned src1_subreg_nr_low:2;
} da3src;
} bits2;
 
union {
struct {
unsigned src1_subreg_nr:5;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned pad0:7;
} da1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
unsigned src1_subreg_nr:1;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned pad2:7;
} da16;
 
struct {
int src1_indirect_offset:10;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
int src1_indirect_offset:6;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned pad0:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad2:5;
} ia16;
 
struct {
int jump_count:16; /* note: signed */
unsigned pop_count:4;
unsigned pad0:12;
} if_else;
 
/* This is also used for gen7 IF/ELSE instructions */
struct {
/* Signed jump distance to the ip to jump to if all channels
* are disabled after the break or continue. It should point
* to the end of the innermost control flow block, as that's
* where some channel could get re-enabled.
*/
int jip:16;
 
/* Signed jump distance to the location to resume execution
* of this channel if it's enabled for the break or continue.
*/
int uip:16;
} break_cont;
 
/**
* \defgroup SEND instructions / Message Descriptors
*
* @{
*/
 
/**
* Generic Message Descriptor for Gen4 SEND instructions. The structs
* below expand function_control to something specific for their
* message. Due to struct packing issues, they duplicate these bits.
*
* See the G45 PRM, Volume 4, Table 14-15.
*/
struct {
unsigned function_control:16;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} generic;
 
/**
* Generic Message Descriptor for Gen5-7 SEND instructions.
*
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
* of the information on the SEND instruction is missing from the public
* Ironlake PRM.)
*
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
* According to the SEND instruction description:
* "The MSb of the message description, the EOT field, always comes from
* bit 127 of the instruction word"...which is bit 31 of this field.
*/
struct {
unsigned function_control:19;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} generic_gen5;
 
/** G45 PRM, Volume 4, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned pad0:8;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} math;
 
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned snapshot:1;
unsigned pad0:10;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} math_gen5;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned return_format:2;
unsigned msg_type:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned simd_mode:2;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen5;
 
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:5;
unsigned simd_mode:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen7;
 
struct brw_urb_immediate {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} urb;
 
struct {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen5;
 
struct {
unsigned opcode:3;
unsigned offset:11;
unsigned swizzle_control:1;
unsigned complete:1;
unsigned per_slot_offset:1;
unsigned pad0:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen7;
 
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
struct {
unsigned binding_table_index:8;
unsigned msg_control:4;
unsigned msg_type:2;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_read_gen5;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_write;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_write_gen5;
 
/**
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
*
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
**/
struct {
unsigned binding_table_index:8;
unsigned msg_control:5;
unsigned msg_type:3;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp_sampler_const_cache;
 
/**
* Message for the Sandybridge Render Cache Data Port.
*
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
* Section 3.9.2.1.1: Message Descriptor.
*
* "Slot Group Select" and "Last Render Target" are part of the
* 5-bit message control for Render Target Write messages. See
* Section 3.9.9.2.1 of the same volume.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_type:4;
unsigned send_commit_msg:1;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp;
 
/**
* Message for any of the Gen7 Data Port caches.
*
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
* Data Port Messages / Message Descriptor. Once again, "Slot Group
* Select" and "Last Render Target" are part of the 6-bit message
* control for Render Target Writes.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_control_pad:1;
unsigned msg_type:4;
unsigned pad1:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad2:2;
unsigned end_of_thread:1;
} gen7_dp;
/** @} */
 
struct {
unsigned src1_subreg_nr_high:1;
unsigned src1_reg_nr:8;
unsigned pad0:1;
unsigned src2_rep_ctrl:1;
unsigned src2_swizzle:8;
unsigned src2_subreg_nr:3;
unsigned src2_reg_nr:8;
unsigned pad1:2;
} da3src;
 
int d;
unsigned ud;
float f;
} bits3;
};
 
 
/* These aren't hardware structs, just something useful for us to pass around:
*
* Align1 operation has a lot of control over input ranges. Used in
* WM programs to implement shaders decomposed into "channel serial"
* or "structure of array" form:
*/
struct brw_reg {
unsigned type:4;
unsigned file:2;
unsigned nr:8;
unsigned subnr:5; /* :1 in align16 */
unsigned negate:1; /* source only */
unsigned abs:1; /* source only */
unsigned vstride:4; /* source only */
unsigned width:3; /* src only, align1 only */
unsigned hstride:2; /* align1 only */
unsigned address_mode:1; /* relative addressing, hopefully! */
unsigned pad0:1;
 
union {
struct {
unsigned swizzle:8; /* src only, align16 only */
unsigned writemask:4; /* dest only, align16 only */
int indirect_offset:10; /* relative addressing offset */
unsigned pad1:10; /* two dwords total */
} bits;
 
float f;
int d;
unsigned ud;
} dw1;
};
 
struct brw_indirect {
unsigned addr_subnr:4;
int addr_offset:10;
unsigned pad:18;
};
 
#define BRW_EU_MAX_INSN_STACK 5
#define BRW_EU_MAX_INSN 10000
 
struct brw_compile {
struct brw_instruction *store;
unsigned nr_insn;
 
int gen;
 
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
 
unsigned flag_value;
bool single_program_flow;
bool compressed;
 
/* Control flow stacks:
* - if_stack contains IF and ELSE instructions which must be patched
* (and popped) once the matching ENDIF instruction is encountered.
*/
struct brw_instruction **if_stack;
int if_stack_depth;
int if_stack_array_size;
};
 
static inline int type_sz(unsigned type)
{
switch (type) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
return 4;
case BRW_REGISTER_TYPE_HF:
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_W:
return 2;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
return 1;
default:
return 0;
}
}
 
/**
* Construct a brw_reg.
* \param file one of the BRW_x_REGISTER_FILE values
* \param nr register number/index
* \param subnr register sub number
* \param type one of BRW_REGISTER_TYPE_x
* \param vstride one of BRW_VERTICAL_STRIDE_x
* \param width one of BRW_WIDTH_x
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
* \param swizzle one of BRW_SWIZZLE_x
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
static inline struct brw_reg brw_reg(unsigned file,
unsigned nr,
unsigned subnr,
unsigned type,
unsigned vstride,
unsigned width,
unsigned hstride,
unsigned swizzle,
unsigned writemask)
{
struct brw_reg reg;
if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
else if (file == BRW_MESSAGE_REGISTER_FILE)
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
 
reg.type = type;
reg.file = file;
reg.nr = nr;
reg.subnr = subnr * type_sz(type);
reg.negate = 0;
reg.abs = 0;
reg.vstride = vstride;
reg.width = width;
reg.hstride = hstride;
reg.address_mode = BRW_ADDRESS_DIRECT;
reg.pad0 = 0;
 
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
* set swizzle and writemask to W, as the lower bits of subnr will
* be lost when converted to align16. This is probably too much to
* keep track of as you'd want it adjusted by suboffset(), etc.
* Perhaps fix up when converting to align16?
*/
reg.dw1.bits.swizzle = swizzle;
reg.dw1.bits.writemask = writemask;
reg.dw1.bits.indirect_offset = 0;
reg.dw1.bits.pad1 = 0;
return reg;
}
 
/** Construct float[16] register */
static inline struct brw_reg brw_vec16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_16,
BRW_WIDTH_16,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[8] register */
static inline struct brw_reg brw_vec8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_8,
BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[4] register */
static inline struct brw_reg brw_vec4_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_4,
BRW_WIDTH_4,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[2] register */
static inline struct brw_reg brw_vec2_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_2,
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYXY,
WRITEMASK_XY);
}
 
/** Construct float[1] register */
static inline struct brw_reg brw_vec1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
 
static inline struct brw_reg __retype(struct brw_reg reg,
unsigned type)
{
reg.type = type;
return reg;
}
 
static inline struct brw_reg __retype_d(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg __retype_ud(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg __retype_uw(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg __sechalf(struct brw_reg reg)
{
if (reg.vstride)
reg.nr++;
return reg;
}
 
static inline struct brw_reg __suboffset(struct brw_reg reg,
unsigned delta)
{
reg.subnr += delta * type_sz(reg.type);
return reg;
}
 
static inline struct brw_reg __offset(struct brw_reg reg,
unsigned delta)
{
reg.nr += delta;
return reg;
}
 
static inline struct brw_reg byte_offset(struct brw_reg reg,
unsigned bytes)
{
unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
reg.nr = newoffset / REG_SIZE;
reg.subnr = newoffset % REG_SIZE;
return reg;
}
 
 
/** Construct unsigned word[16] register */
static inline struct brw_reg brw_uw16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[8] register */
static inline struct brw_reg brw_uw8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[1] register */
static inline struct brw_reg brw_uw1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
static inline struct brw_reg brw_imm_reg(unsigned type)
{
return brw_reg( BRW_IMMEDIATE_VALUE,
0,
0,
type,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
0,
0);
}
 
/** Construct float immediate register */
static inline struct brw_reg brw_imm_f(float f)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
imm.dw1.f = f;
return imm;
}
 
/** Construct integer immediate register */
static inline struct brw_reg brw_imm_d(int d)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
imm.dw1.d = d;
return imm;
}
 
/** Construct uint immediate register */
static inline struct brw_reg brw_imm_ud(unsigned ud)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
imm.dw1.ud = ud;
return imm;
}
 
/** Construct ushort immediate register */
static inline struct brw_reg brw_imm_uw(uint16_t uw)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
imm.dw1.ud = uw | (uw << 16);
return imm;
}
 
/** Construct short immediate register */
static inline struct brw_reg brw_imm_w(int16_t w)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
imm.dw1.d = w | (w << 16);
return imm;
}
 
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
* numbers alias with _V and _VF below:
*/
 
/** Construct vector of eight signed half-byte values */
static inline struct brw_reg brw_imm_v(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_8;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
/** Construct vector of four 8-bit float values */
static inline struct brw_reg brw_imm_vf(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
#define VF_ZERO 0x0
#define VF_ONE 0x30
#define VF_NEG (1<<7)
 
static inline struct brw_reg brw_imm_vf4(unsigned v0,
unsigned v1,
unsigned v2,
unsigned v3)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = ((v0 << 0) |
(v1 << 8) |
(v2 << 16) |
(v3 << 24));
return imm;
}
 
static inline struct brw_reg brw_address(struct brw_reg reg)
{
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
 
/** Construct float[1] general-purpose register */
static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[2] general-purpose register */
static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
{
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[4] general-purpose register */
static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[8] general-purpose register */
static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
{
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
{
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct null register (usually used for setting condition codes) */
static inline struct brw_reg brw_null_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL,
0);
}
 
static inline struct brw_reg brw_address_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ADDRESS,
subnr);
}
 
/* If/else instructions break in align16 mode if writemask & swizzle
* aren't xyzw. This goes against the convention for other scalar
* regs:
*/
static inline struct brw_reg brw_ip_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_IP,
0,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_4, /* ? */
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, /* NOTE! */
WRITEMASK_XYZW); /* NOTE! */
}
 
static inline struct brw_reg brw_acc_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ACCUMULATOR,
0);
}
 
static inline struct brw_reg brw_notification_1_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NOTIFICATION_COUNT,
1,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
static inline struct brw_reg brw_flag_reg(void)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_FLAG,
0);
}
 
static inline struct brw_reg brw_mask_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_MASK,
subnr);
}
 
static inline struct brw_reg brw_message_reg(unsigned nr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
}
 
static inline struct brw_reg brw_message4_reg(unsigned nr,
int subnr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
}
 
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/
static inline unsigned cvt(unsigned val)
{
switch (val) {
case 0: return 0;
case 1: return 1;
case 2: return 2;
case 4: return 3;
case 8: return 4;
case 16: return 5;
case 32: return 6;
}
return 0;
}
 
static inline struct brw_reg __stride(struct brw_reg reg,
unsigned vstride,
unsigned width,
unsigned hstride)
{
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
reg.hstride = cvt(hstride);
return reg;
}
 
static inline struct brw_reg vec16(struct brw_reg reg)
{
return __stride(reg, 16,16,1);
}
 
static inline struct brw_reg vec8(struct brw_reg reg)
{
return __stride(reg, 8,8,1);
}
 
static inline struct brw_reg vec4(struct brw_reg reg)
{
return __stride(reg, 4,4,1);
}
 
static inline struct brw_reg vec2(struct brw_reg reg)
{
return __stride(reg, 2,2,1);
}
 
static inline struct brw_reg vec1(struct brw_reg reg)
{
return __stride(reg, 0,1,0);
}
 
static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(reg, elt));
}
 
static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
 
static inline struct brw_reg brw_swizzle(struct brw_reg reg,
unsigned x,
unsigned y,
unsigned z,
unsigned w)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
 
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
return reg;
}
 
static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
unsigned x)
{
return brw_swizzle(reg, x, x, x, x);
}
 
static inline struct brw_reg brw_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask &= mask;
return reg;
}
 
static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = mask;
return reg;
}
 
static inline struct brw_reg brw_negate(struct brw_reg reg)
{
reg.negate ^= 1;
return reg;
}
 
static inline struct brw_reg brw_abs(struct brw_reg reg)
{
reg.abs = 1;
return reg;
}
 
/***********************************************************************
*/
static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
{
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
{
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
{
return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
}
 
static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
{
return brw_address_reg(ptr.addr_subnr);
}
 
static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
{
ptr.addr_offset += offset;
return ptr;
}
 
static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
{
struct brw_indirect ptr;
ptr.addr_subnr = addr_subnr;
ptr.addr_offset = offset;
ptr.pad = 0;
return ptr;
}
 
/** Do two brw_regs refer to the same register? */
static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
{
return r1.file == r2.file && r1.nr == r2.nr;
}
 
static inline struct brw_instruction *current_insn( struct brw_compile *p)
{
return &p->store[p->nr_insn];
}
 
static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
{
p->current->header.predicate_control = pc;
}
 
static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
{
p->current->header.predicate_inverse = predicate_inverse;
}
 
static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
{
p->current->header.destreg__conditionalmod = conditional;
}
 
static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
{
p->current->header.access_mode = access_mode;
}
 
static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
{
p->current->header.mask_control = value;
}
 
static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
{
p->current->header.saturate = value;
}
 
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
{
if (p->gen >= 060)
p->current->header.acc_wr_control = value;
}
 
void brw_pop_insn_state(struct brw_compile *p);
void brw_push_insn_state(struct brw_compile *p);
void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
 
void brw_compile_init(struct brw_compile *p, int gen, void *store);
 
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest);
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg);
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg);
 
void gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr);
 
static inline struct brw_instruction *
brw_next_insn(struct brw_compile *p, unsigned opcode)
{
struct brw_instruction *insn;
 
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
 
insn = &p->store[p->nr_insn++];
*insn = *p->current;
 
if (p->current->header.destreg__conditionalmod) {
p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
 
insn->header.opcode = opcode;
return insn;
}
 
/* Helpers for regular instructions: */
#define ALU1(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0) \
{ \
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
}
 
#define ALU2(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1) \
{ \
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
}
 
/* Rounding operations (other than RNDD) require two instructions - the first
* stores a rounded value (possibly the wrong way) in the dest register, but
* also sets a per-channel "increment bit" in the flag register. A predicated
* add of 1.0 fixes dest to contain the desired result.
*
* Sandybridge and later appear to round correctly without an ADD.
*/
#define ROUND(OP) \
static inline void brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src) \
{ \
struct brw_instruction *rnd, *add; \
rnd = brw_next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
brw_set_src0(p, rnd, src); \
if (p->gen < 060) { \
/* turn on round-increments */ \
rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
add->header.predicate_control = BRW_PREDICATE_NORMAL; \
} \
}
 
static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src)
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
return insn;
}
 
static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 )
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
return insn;
}
 
static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.2.2: add */
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.32.38: mul */
if (src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD ||
src1.type == BRW_REGISTER_TYPE_D ||
src1.type == BRW_REGISTER_TYPE_UD) {
assert(dest.type != BRW_REGISTER_TYPE_F);
}
 
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src0.nr != BRW_ARF_ACCUMULATOR);
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src1.nr != BRW_ARF_ACCUMULATOR);
 
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
 
insn->header.execution_size = 1;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_DISABLE;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
 
ALU1(MOV);
ALU2(SEL);
ALU1(NOT);
ALU2(AND);
ALU2(OR);
ALU2(XOR);
ALU2(SHR);
ALU2(SHL);
ALU2(RSR);
ALU2(RSL);
ALU2(ASR);
ALU1(FRC);
ALU1(RNDD);
ALU2(MAC);
ALU2(MACH);
ALU1(LZD);
ALU2(DP4);
ALU2(DPH);
ALU2(DP3);
ALU2(DP2);
ALU2(LINE);
ALU2(PLN);
 
ROUND(RNDZ);
ROUND(RNDE);
 
#undef ALU1
#undef ALU2
#undef ROUND
 
/* Helpers for SEND instruction */
void brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length);
 
void brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg);
 
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle);
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot);
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present);
 
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode);
 
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision);
 
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision);
 
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1);
 
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index);
 
void brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index);
 
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index);
 
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addrReg,
unsigned offset,
unsigned bind_table_index);
 
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
struct brw_instruction *brw_IF(struct brw_compile *p,
unsigned execute_size);
struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1);
 
void brw_ELSE(struct brw_compile *p);
void brw_ENDIF(struct brw_compile *p);
 
/* DO/WHILE loops:
*/
struct brw_instruction *brw_DO(struct brw_compile *p,
unsigned execute_size);
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn);
 
void brw_NOP(struct brw_compile *p);
 
void brw_WAIT(struct brw_compile *p);
 
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1);
 
static inline void brw_math_invert(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
{
brw_math(p,
dst,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
src,
BRW_MATH_PRECISION_FULL,
BRW_MATH_DATA_VECTOR);
}
 
void brw_set_uip_jip(struct brw_compile *p);
 
uint32_t brw_swap_cmod(uint32_t cmod);
 
void brw_disasm(FILE *file,
const struct brw_instruction *inst,
int gen);
 
#endif
/drivers/video/Intel-2D/sna/brw/brw_eu_emit.c
0,0 → 1,2002
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/***********************************************************************
* Internal helper for constructing instructions
*/
 
static void guess_execution_size(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width;
}
 
 
/**
* Prior to Sandybridge, the SEND instruction accepted non-MRF source
* registers, implicitly moving the operand to a message register.
*
* On Sandybridge, this is no longer the case. This function performs the
* explicit move; it should be called before emitting a SEND instruction.
*/
void
gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr)
{
if (p->gen < 060)
return;
 
if (src->file == BRW_MESSAGE_REGISTER_FILE)
return;
 
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
brw_pop_insn_state(p);
}
*src = brw_message_reg(msg_reg_nr);
}
 
static void
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
{
/* From the BSpec / ISA Reference / send - [DevIVB+]:
* "The send with EOT should use register space R112-R127 for <src>. This is
* to enable loading of a new thread into the same slot while the message
* with EOT for current thread is pending dispatch."
*
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += 111;
}
}
 
void
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
 
gen7_convert_mrf_to_grf(p, &dest);
 
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
 
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
insn->bits1.da1.dest_reg_nr = dest.nr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.da1.dest_horiz_stride = dest.hstride;
} else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
/* even ignored in da16, still need to set as '01' */
insn->bits1.da16.dest_horiz_stride = 1;
}
} else {
insn->bits1.ia1.dest_subreg_nr = dest.subnr;
 
/* These are different sizes in align1 vs align16:
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
/* even ignored in da16, still need to set as '01' */
insn->bits1.ia16.dest_horiz_stride = 1;
}
}
 
guess_execution_size(p, insn, dest);
}
 
static const int reg_type_size[8] = {
[0] = 4,
[1] = 4,
[2] = 2,
[3] = 2,
[4] = 1,
[5] = 1,
[7] = 4
};
 
static void
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
{
int hstride_for_reg[] = {0, 1, 2, 4};
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
int width_for_reg[] = {1, 2, 4, 8, 16};
int execsize_for_reg[] = {1, 2, 4, 8, 16};
int width, hstride, vstride, execsize;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
/* 3.3.6: Region Parameters. Restriction: Immediate vectors
* mean the destination has to be 128-bit aligned and the
* destination horiz stride has to be a word.
*/
if (reg.type == BRW_REGISTER_TYPE_V) {
assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
}
 
return;
}
 
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
reg.file == BRW_ARF_NULL)
return;
 
hstride = hstride_for_reg[reg.hstride];
 
if (reg.vstride == 0xf) {
vstride = -1;
} else {
vstride = vstride_for_reg[reg.vstride];
}
 
width = width_for_reg[reg.width];
 
execsize = execsize_for_reg[insn->header.execution_size];
 
/* Restrictions from 3.3.10: Register Region Restrictions. */
/* 3. */
assert(execsize >= width);
 
/* 4. */
if (execsize == width && hstride != 0) {
assert(vstride == -1 || vstride == width * hstride);
}
 
/* 5. */
if (execsize == width && hstride == 0) {
/* no restriction on vstride. */
}
 
/* 6. */
if (width == 1) {
assert(hstride == 0);
}
 
/* 7. */
if (execsize == 1 && width == 1) {
assert(hstride == 0);
assert(vstride == 0);
}
 
/* 8. */
if (vstride == 0 && hstride == 0) {
assert(width == 1);
}
 
/* 10. Check destination issues. */
}
 
void
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
insn->bits2.da1.src0_abs = reg.abs;
insn->bits2.da1.src0_negate = reg.negate;
insn->bits2.da1.src0_address_mode = reg.address_mode;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
 
/* Required to set some fields in src1 as well:
*/
insn->bits1.da1.src1_reg_file = 0; /* arf */
insn->bits1.da1.src1_reg_type = reg.type;
} else {
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.da1.src0_subreg_nr = reg.subnr;
insn->bits2.da1.src0_reg_nr = reg.nr;
} else {
insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
insn->bits2.da16.src0_reg_nr = reg.nr;
}
} else {
insn->bits2.ia1.src0_subreg_nr = reg.subnr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
} else {
insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
}
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits2.da1.src0_width = BRW_WIDTH_1;
insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits2.da1.src0_horiz_stride = reg.hstride;
insn->bits2.da1.src0_width = reg.width;
insn->bits2.da1.src0_vert_stride = reg.vstride;
}
} else {
insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits2.da16.src0_vert_stride = reg.vstride;
}
}
}
 
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
insn->bits3.da1.src1_abs = reg.abs;
insn->bits3.da1.src1_negate = reg.negate;
 
/* Only src1 can be immediate in two-argument instructions. */
assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
} else {
/* This is a hardware restriction, which may or may not be lifted
* in the future:
*/
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits3.da1.src1_subreg_nr = reg.subnr;
insn->bits3.da1.src1_reg_nr = reg.nr;
} else {
insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
insn->bits3.da16.src1_reg_nr = reg.nr;
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits3.da1.src1_width = BRW_WIDTH_1;
insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits3.da1.src1_horiz_stride = reg.hstride;
insn->bits3.da1.src1_width = reg.width;
insn->bits3.da1.src1_vert_stride = reg.vstride;
}
} else {
insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits3.da16.src1_vert_stride = reg.vstride;
}
}
}
 
/**
* Set the Message Descriptor and Extended Message Descriptor fields
* for SEND messages.
*
* \note This zeroes out the Function Control bits, so it must be called
* \b before filling out any message-specific data. Callers can
* choose not to fill in irrelevant bits; they will be zero.
*/
static void
brw_set_message_descriptor(struct brw_compile *p,
struct brw_instruction *inst,
enum brw_message_target sfid,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread)
{
brw_set_src1(p, inst, brw_imm_d(0));
 
if (p->gen >= 050) {
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
 
if (p->gen >= 060) {
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
inst->header.destreg__conditionalmod = sfid;
} else {
/* Set Extended Message Descriptor (ex_desc) */
inst->bits2.send_gen5.sfid = sfid;
inst->bits2.send_gen5.end_of_thread = end_of_thread;
}
} else {
inst->bits3.generic.response_length = response_length;
inst->bits3.generic.msg_length = msg_length;
inst->bits3.generic.msg_target = sfid;
inst->bits3.generic.end_of_thread = end_of_thread;
}
}
 
 
static void brw_set_math_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned function,
unsigned integer_type,
bool low_precision,
bool saturate,
unsigned dataType)
{
unsigned msg_length;
unsigned response_length;
 
/* Infer message length from the function */
switch (function) {
case BRW_MATH_FUNCTION_POW:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
msg_length = 2;
break;
default:
msg_length = 1;
break;
}
 
/* Infer response length from the function */
switch (function) {
case BRW_MATH_FUNCTION_SINCOS:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
response_length = 2;
break;
default:
response_length = 1;
break;
}
 
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
msg_length, response_length,
false, false);
if (p->gen == 050) {
insn->bits3.math_gen5.function = function;
insn->bits3.math_gen5.int_type = integer_type;
insn->bits3.math_gen5.precision = low_precision;
insn->bits3.math_gen5.saturate = saturate;
insn->bits3.math_gen5.data_type = dataType;
insn->bits3.math_gen5.snapshot = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
insn->bits3.math.precision = low_precision;
insn->bits3.math.saturate = saturate;
insn->bits3.math.data_type = dataType;
}
}
 
static void brw_set_ff_sync_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
unsigned response_length,
bool end_of_thread)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
1, response_length,
true, end_of_thread);
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
}
 
static void brw_set_urb_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool end_of_thread,
bool complete,
unsigned offset,
unsigned swizzle_control)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
msg_length, response_length, true, end_of_thread);
if (p->gen >= 070) {
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
insn->bits3.urb_gen7.offset = offset;
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
insn->bits3.urb_gen7.swizzle_control = swizzle_control;
/* per_slot_offset = 0 makes it ignore offsets in message header */
insn->bits3.urb_gen7.per_slot_offset = 0;
insn->bits3.urb_gen7.complete = complete;
} else if (p->gen >= 050) {
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
insn->bits3.urb_gen5.offset = offset;
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = used; /* ? */
insn->bits3.urb_gen5.complete = complete;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
insn->bits3.urb.swizzle_control = swizzle_control;
insn->bits3.urb.allocate = allocate;
insn->bits3.urb.used = used; /* ? */
insn->bits3.urb.complete = complete;
}
}
 
void
brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg)
{
unsigned sfid;
 
if (p->gen >= 070) {
/* Use the Render Cache for RT writes; otherwise use the Data Cache */
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
/* Use the render cache for all write messages. */
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_WRITE;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
header_present, end_of_thread);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = last_render_target;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = last_render_target;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
} else if (p->gen >= 050) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.last_render_target = last_render_target;
insn->bits3.dp_write_gen5.msg_type = msg_type;
insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
insn->bits3.dp_write.last_render_target = last_render_target;
insn->bits3.dp_write.msg_type = msg_type;
insn->bits3.dp_write.send_commit_msg = send_commit_msg;
}
}
 
void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length)
{
unsigned sfid;
 
if (p->gen >= 070) {
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_READ;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
true, false);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = 0;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = 0;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = 0;
} else if (p->gen >= 050) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
insn->bits3.dp_read_gen5.msg_type = msg_type;
insn->bits3.dp_read_gen5.target_cache = target_cache;
} else if (p->gen >= 045) {
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
}
}
 
static void brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned sampler,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
msg_length, response_length,
header_present, false);
 
if (p->gen >= 070) {
insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
insn->bits3.sampler_gen7.sampler = sampler;
insn->bits3.sampler_gen7.msg_type = msg_type;
insn->bits3.sampler_gen7.simd_mode = simd_mode;
} else if (p->gen >= 050) {
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
insn->bits3.sampler_gen5.sampler = sampler;
insn->bits3.sampler_gen5.msg_type = msg_type;
insn->bits3.sampler_gen5.simd_mode = simd_mode;
} else if (p->gen >= 045) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
} else {
insn->bits3.sampler.binding_table_index = binding_table_index;
insn->bits3.sampler.sampler = sampler;
insn->bits3.sampler.msg_type = msg_type;
insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
}
}
 
 
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_ud(0x0));
}
 
/***********************************************************************
* Comparisons, if/else/endif
*/
 
static void
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
{
p->if_stack[p->if_stack_depth] = inst;
 
p->if_stack_depth++;
if (p->if_stack_array_size <= p->if_stack_depth) {
p->if_stack_array_size *= 2;
p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
}
}
 
/* EU takes the value from the flag register and pushes it onto some
* sort of a stack (presumably merging with any flag value already on
* the stack). Within an if block, the flags at the top of the stack
* control execution on each channel of the unit, eg. on each of the
* 16 pixel values in our wm programs.
*
* When the matching 'else' instruction is reached (presumably by
* countdown of the instruction count patched in by our ELSE/ENDIF
* functions), the relevent flags are inverted.
*
* When the matching 'endif' instruction is reached, the flags are
* popped off. If the stack is now empty, normal execution resumes.
*/
struct brw_instruction *
brw_IF(struct brw_compile *p, unsigned execute_size)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
/* Override the defaults for this instruction: */
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.execution_size = execute_size;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.predicate_control = BRW_PREDICATE_NORMAL;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
push_if_stack(p, insn);
return insn;
}
 
/* This function is only used for gen6-style IF instructions with an
* embedded comparison (conditional modifier). It is not used on gen7.
*/
struct brw_instruction *
gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
brw_set_dest(p, insn, brw_imm_w(0));
if (p->compressed) {
insn->header.execution_size = BRW_EXECUTE_16;
} else {
insn->header.execution_size = BRW_EXECUTE_8;
}
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = conditional;
 
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
return insn;
}
 
/**
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
*/
static void
convert_IF_ELSE_to_ADD(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst)
{
/* The next instruction (where the ENDIF would be, if it existed) */
struct brw_instruction *next_inst = &p->store[p->nr_insn];
 
assert(p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
assert(if_inst->header.execution_size == BRW_EXECUTE_1);
 
/* Convert IF to an ADD instruction that moves the instruction pointer
* to the first instruction of the ELSE block. If there is no ELSE
* block, point to where ENDIF would be. Reverse the predicate.
*
* There's no need to execute an ENDIF since we don't need to do any
* stack operations, and if we're currently executing, we just want to
* continue normally.
*/
if_inst->header.opcode = BRW_OPCODE_ADD;
if_inst->header.predicate_inverse = 1;
 
if (else_inst != NULL) {
/* Convert ELSE to an ADD instruction that points where the ENDIF
* would be.
*/
else_inst->header.opcode = BRW_OPCODE_ADD;
 
if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
else_inst->bits3.ud = (next_inst - else_inst) * 16;
} else {
if_inst->bits3.ud = (next_inst - if_inst) * 16;
}
}
 
/**
* Patch IF and ELSE instructions with appropriate jump targets.
*/
static void
patch_IF_ELSE(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst,
struct brw_instruction *endif_inst)
{
unsigned br = 1;
 
assert(!p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(endif_inst != NULL);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
 
/* Jump count is for 64bit data chunk each, so one 128bit instruction
* requires 2 chunks.
*/
if (p->gen >= 050)
br = 2;
 
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
endif_inst->header.execution_size = if_inst->header.execution_size;
 
if (else_inst == NULL) {
/* Patch IF -> ENDIF */
if (p->gen < 060) {
/* Turn it into an IFF, which means no mask stack operations for
* all-false and jumping past the ENDIF.
*/
if_inst->header.opcode = BRW_OPCODE_IFF;
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
} else {
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
}
} else {
else_inst->header.execution_size = if_inst->header.execution_size;
 
/* Patch IF -> ELSE */
if (p->gen < 060) {
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen <= 070) {
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
}
 
/* Patch ELSE -> ENDIF */
if (p->gen < 060) {
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
* matching ENDIF.
*/
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
else_inst->bits3.if_else.pop_count = 1;
else_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
} else {
/* The IF instruction's JIP should point just past the ELSE */
if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
}
}
}
 
void
brw_ELSE(struct brw_compile *p)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_ELSE);
 
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
}
 
void
brw_ENDIF(struct brw_compile *p)
{
struct brw_instruction *insn;
struct brw_instruction *else_inst = NULL;
struct brw_instruction *if_inst = NULL;
 
/* Pop the IF and (optional) ELSE instructions from the stack */
p->if_stack_depth--;
if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
else_inst = p->if_stack[p->if_stack_depth];
p->if_stack_depth--;
}
if_inst = p->if_stack[p->if_stack_depth];
 
if (p->single_program_flow) {
/* ENDIF is useless; don't bother emitting it. */
convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
return;
}
 
insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
 
if (p->gen < 060) {
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
insn->header.thread_control = BRW_THREAD_SWITCH;
 
/* Also pop item off the stack in the endif instruction: */
if (p->gen < 060) {
insn->bits3.if_else.jump_count = 0;
insn->bits3.if_else.pop_count = 1;
insn->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
insn->bits1.branch_gen6.jump_count = 2;
} else {
insn->bits3.break_cont.jip = 2;
}
patch_IF_ELSE(p, if_inst, else_inst, insn);
}
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_BREAK);
if (p->gen >= 060) {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
 
return insn;
}
 
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
return insn;
}
 
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
return insn;
}
 
/* DO/WHILE loop:
*
* The DO/WHILE is just an unterminated loop -- break or continue are
* used for control within the loop. We have a few ways they can be
* done.
*
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
* jip and no DO instruction.
*
* For non-uniform control flow pre-gen6, there's a DO instruction to
* push the mask, and a WHILE to jump back, and BREAK to get out and
* pop the mask.
*
* For gen6, there's no more mask stack, so no need for DO. WHILE
* just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
{
if (p->gen >= 060 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
 
/* Override the defaults for this instruction:
*/
brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(p, insn, brw_null_reg());
brw_set_src1(p, insn, brw_null_reg());
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = execute_size;
insn->header.predicate_control = BRW_PREDICATE_NONE;
/* insn->header.mask_control = BRW_MASK_ENABLE; */
/* insn->header.mask_control = BRW_MASK_DISABLE; */
 
return insn;
}
}
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
unsigned br = 1;
 
if (p->gen >= 050)
br = 2;
 
if (p->gen >= 070) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = br * (do_insn - insn);
 
insn->header.execution_size = BRW_EXECUTE_8;
} else if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = brw_next_insn(p, BRW_OPCODE_ADD);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
insn->header.execution_size = BRW_EXECUTE_1;
} else {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
assert(do_insn->header.opcode == BRW_OPCODE_DO);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0));
 
insn->header.execution_size = do_insn->header.execution_size;
insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
insn->bits3.if_else.pop_count = 0;
insn->bits3.if_else.pad0 = 0;
}
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
/* FORWARD JUMPS:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
struct brw_instruction *landing = &p->store[p->nr_insn];
unsigned jmpi = 1;
 
if (p->gen >= 050)
jmpi = 2;
 
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
 
 
 
/* To integrate with the above, it makes sense that the comparison
* instruction should populate the flag register. It might be simpler
* just to use the flag reg for most WM tasks?
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
 
insn->header.destreg__conditionalmod = conditional;
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
/* Make it so that future instructions will use the computed flag
* value until brw_set_predicate_control_flag_value() is called
* again.
*/
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
dest.nr == 0) {
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
p->flag_value = 0xff;
}
}
 
/* Issue 'wait' instruction for n1, host could program MMIO
to wake up thread. */
void brw_WAIT(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
 
brw_set_dest(p, insn, src);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
insn->header.predicate_control = 0;
insn->header.compression_control = 0;
}
 
/***********************************************************************
* Helpers for the various SEND message types:
*/
 
/** Extended math function, float[8].
*/
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision)
{
if (p->gen >= 060) {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
}
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
/* Example code doesn't set predicate_control for send
* instructions.
*/
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
src.type == BRW_REGISTER_TYPE_D,
precision,
saturate,
data_type);
}
}
 
/** Extended math function, float[8].
*/
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src0.type == BRW_REGISTER_TYPE_F);
assert(src1.type == BRW_REGISTER_TYPE_F);
}
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src0.negate);
assert(!src0.abs);
assert(!src1.negate);
assert(!src1.abs);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
}
 
/**
* Extended math function, float[16].
* Use 2 send instructions.
*/
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision)
{
struct brw_instruction *insn;
 
if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
return;
}
 
/* First instruction:
*/
brw_push_insn_state(p);
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
/* Second instruction:
*/
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
 
brw_set_dest(p, insn, __offset(dest,1));
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
brw_pop_insn_state(p);
}
 
/**
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
* using a constant offset per channel.
*
* The offset must be aligned to oword size (16 bytes). Used for
* register spilling.
*/
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control, msg_type;
int mlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
mlen = 2;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
mlen = 3;
}
 
/* Set up the message header. This is g0, with g0.2 filled with
* the offset. We don't want to leave our offset around in g0 or
* it'll screw up texture samples, so set it up inside the message
* reg.
*/
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_reg dest;
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
int send_commit_msg;
struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
 
if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
insn->header.compression_control = BRW_COMPRESSION_NONE;
src_header = vec16(src_header);
}
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* Until gen6, writes followed by reads from the same location
* are not guaranteed to be ordered unless write_commit is set.
* If set, then a no-op write is issued to the destination
* register to set a dependency, and a read from the destination
* can be used to ensure the ordering.
*
* For gen6, only writes between different threads need ordering
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
if (p->gen >= 060) {
dest = __retype_uw(vec16(brw_null_reg()));
send_commit_msg = 0;
} else {
dest = src_header;
send_commit_msg = 1;
}
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
brw_set_dp_write_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
msg_type,
mlen,
true, /* header_present */
0, /* pixel scoreboard */
send_commit_msg, /* response_length */
0, /* eot */
send_commit_msg);
}
}
 
 
/**
* Read a block of owords (half a GRF each) from the scratch buffer
* using a constant index per channel.
*
* Offset must be aligned to oword size (16 bytes). Used for register
* spilling.
*/
void
brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control;
int rlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
dest = __retype_uw(dest);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
rlen = 1;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
rlen = 2;
}
 
{
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(insn->header.predicate_control == 0);
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
 
brw_set_dest(p, insn, dest); /* UW? */
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
}
 
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
/* On newer hardware, offset is in units of owords. */
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 reg, 2 owords!) */
 
brw_pop_insn_state(p);
}
 
/**
* Read a set of dwords from the data port Data Cache (const buffer).
*
* Location (in buffer) appears as UD offsets in the register after
* the provided mrf header reg.
*/
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, brw_null_reg());
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
/**
* Read float[4] constant(s) from VS constant buffer.
* For relative addressing, two float[4] constants will be read into 'dest'.
* Otherwise, one float[4] constant will be read into the lower half of 'dest'.
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index)
{
struct brw_instruction *insn;
unsigned msg_reg_nr = 1;
 
if (p->gen >= 060)
location /= 16;
 
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
brw_imm_ud(location));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
0,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 Oword) */
}
 
/**
* Read a float[4] constant per vertex from VS constant buffer, with
* relative addressing.
*/
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addr_reg,
unsigned offset,
unsigned bind_table_index)
{
struct brw_reg src = brw_vec8_grf(0, 0);
struct brw_instruction *insn;
int msg_type;
 
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
brw_ADD(p, __retype_d(brw_message_reg(1)),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
 
gen6_resolve_implied_move(p, &src, 0);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else if (p->gen >= 045)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present)
{
struct brw_instruction *insn;
unsigned msg_type;
struct brw_reg dest;
 
if (dispatch_width == 16)
dest = __retype_uw(vec16(brw_null_reg()));
else
dest = __retype_uw(vec8(brw_null_reg()));
 
if (p->gen >= 060 && binding_table_index == 0) {
insn = brw_next_insn(p, BRW_OPCODE_SENDC);
} else {
insn = brw_next_insn(p, BRW_OPCODE_SEND);
}
/* The execution mask is ignored for render target writes. */
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
 
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
} else {
insn->header.destreg__conditionalmod = msg_reg_nr;
 
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
}
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p,
insn,
binding_table_index,
msg_control,
msg_type,
msg_length,
header_present,
eot,
response_length,
eot,
0 /* send_commit_msg */);
}
 
/**
* Texture sample instruction.
* Note: the msg_type plus msg_length values determine exactly what kind
* of sampling operation is performed. See volume 4, page 161 of docs.
*/
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
assert(writemask);
 
if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 
writemask = ~writemask & WRITEMASK_XYZW;
 
brw_push_insn_state(p);
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
 
brw_pop_insn_state(p);
 
src0 = __retype_uw(brw_null_reg());
}
 
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_sampler_message(p, insn,
binding_table_index,
sampler,
msg_type,
response_length,
msg_length,
header_present,
simd_mode);
}
}
 
/* All these variables are pretty confusing - we might be better off
* using bitmasks and macros for this, in the old style. Or perhaps
* just having the caller instantiate the fields in dword3 itself.
*/
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
if (p->gen >= 070) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
__retype_ud(brw_vec1_grf(0, 5)),
brw_imm_ud(0xff00));
brw_pop_insn_state(p);
}
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(msg_length < BRW_MAX_MRF);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen <= 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_urb_message(p,
insn,
allocate,
used,
msg_length,
response_length,
eot,
writes_complete,
offset,
swizzle);
}
 
static int
brw_find_next_block_end(struct brw_compile *p, int start)
{
int ip;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* There is no DO instruction on gen6, so to find the end of the loop
* we have to see if the loop is jumping back before our start
* instruction.
*/
static int
brw_find_loop_end(struct brw_compile *p, int start)
{
int ip;
int br = 2;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
if (insn->header.opcode == BRW_OPCODE_WHILE) {
int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
if (ip + jip / br <= start)
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* After program generation, go back and update the UIP and JIP of
* BREAK and CONT instructions to their correct locations.
*/
void
brw_set_uip_jip(struct brw_compile *p)
{
int ip;
int br = 2;
 
if (p->gen <= 060)
return;
 
for (ip = 0; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_BREAK:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
/* Gen7 UIP points to WHILE; Gen6 points just after it */
insn->bits3.break_cont.uip =
br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
break;
case BRW_OPCODE_CONTINUE:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
assert(insn->bits3.break_cont.uip != 0);
assert(insn->bits3.break_cont.jip != 0);
break;
}
}
}
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_ff_sync_message(p,
insn,
allocate,
response_length,
eot);
}
/drivers/video/Intel-2D/sna/brw/brw_sf.c
0,0 → 1,54
#include "brw.h"
 
bool brw_sf_kernel__nomask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2, delta;
 
v0 = brw_vec4_grf(3, 0);
v1 = brw_vec4_grf(4, 0);
v2 = brw_vec4_grf(5, 0);
delta = brw_vec8_grf(7, 0);
 
inv = brw_vec4_grf(6, 0);
brw_math_invert(p, inv, brw_vec4_grf(1, 11));
 
brw_MOV(p, brw_message_reg(3), v0);
 
brw_ADD(p, delta, v1, brw_negate(v2));
brw_MUL(p, brw_message_reg(1), delta, brw_vec1_grf(6,0));
 
brw_ADD(p, delta, v2, brw_negate(v0));
brw_MUL(p, brw_message_reg(2), delta, brw_vec1_grf(6,2));
 
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
 
return true;
}
 
bool brw_sf_kernel__mask(struct brw_compile *p)
{
struct brw_reg inv, v0, v1, v2;
 
v0 = brw_vec8_grf(3, 0);
v1 = brw_vec8_grf(4, 0);
v2 = brw_vec8_grf(5, 0);
 
inv = brw_vec4_grf(6, 0);
brw_math_invert(p, inv, brw_vec4_grf(1, 11));
 
brw_MOV(p, brw_message_reg(3), v0);
 
brw_ADD(p, brw_vec8_grf(7, 0), v1, brw_negate(v2));
brw_MUL(p, brw_message_reg(1), brw_vec8_grf(7, 0), brw_vec1_grf(6,0));
 
brw_ADD(p, brw_vec8_grf(7, 0), v2, brw_negate(v0));
brw_MUL(p, brw_message_reg(2), brw_vec8_grf(7, 0), brw_vec1_grf(6,2));
 
brw_urb_WRITE(p, brw_null_reg(), 0, brw_vec8_grf(0 ,0),
false, true, 4, 0, true, true, 0,
BRW_URB_SWIZZLE_TRANSPOSE);
 
return true;
}
/drivers/video/Intel-2D/sna/brw/brw_wm.c
0,0 → 1,681
#include "brw.h"
 
#define X16 8
#define Y16 10
 
static void brw_wm_xy(struct brw_compile *p, int dw)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = __retype_uw(r1);
struct brw_reg x_uw, y_uw;
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
if (dw == 16) {
x_uw = brw_uw16_grf(30, 0);
y_uw = brw_uw16_grf(28, 0);
} else {
x_uw = brw_uw8_grf(30, 0);
y_uw = brw_uw8_grf(28, 0);
}
 
brw_ADD(p,
x_uw,
__stride(__suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
brw_ADD(p,
y_uw,
__stride(__suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
 
static void brw_wm_affine_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
msg += dw/8;
 
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 4),
brw_vec8_grf(2, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
}
}
 
static inline unsigned simd(int dw)
{
return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
}
 
static inline struct brw_reg sample_result(int dw, int result)
{
return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static int brw_wm_sample(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
bool header;
int len;
 
len = dw == 16 ? 4 : 2;
if (p->gen >= 060) {
header = false;
src0 = brw_message_reg(++msg);
} else {
header = true;
src0 = brw_vec8_grf(0, 0);
}
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_XYZW, 0,
2*len, len+header, header, simd(dw));
return result;
}
 
static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
int mlen, rlen;
 
if (dw == 8) {
/* SIMD8 sample return is not masked */
mlen = 3;
rlen = 4;
} else {
mlen = 5;
rlen = 2;
}
 
if (p->gen >= 060)
src0 = brw_message_reg(msg);
else
src0 = brw_vec8_grf(0, 0);
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_W, 0,
rlen, mlen, true, simd(dw));
 
if (dw == 8)
result += 3;
 
return result;
}
 
static int brw_wm_affine(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
static inline struct brw_reg null_result(int dw)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static void brw_fb_write(struct brw_compile *p, int dw)
{
struct brw_instruction *insn;
unsigned msg_control, msg_type, msg_len;
struct brw_reg src0;
bool header;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
msg_len = 8;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
msg_len = 4;
}
 
if (p->gen < 060) {
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
brw_pop_insn_state(p);
 
msg_len += 2;
}
 
/* The execution mask is ignored for render target writes. */
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = brw_message_reg(2);
header = false;
} else {
insn->header.destreg__conditionalmod = 0;
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = __retype_uw(brw_vec8_grf(0, 0));
header = true;
}
 
brw_set_dest(p, insn, null_result(dw));
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p, insn, 0,
msg_control, msg_type, msg_len,
header, true, 0, true, false);
}
 
static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
/* XXX pixel execution mask? */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MOV(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MOV(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MOV(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec8_grf(mask, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec8_grf(mask+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__opacity(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec1_grf(mask, 3));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec1_grf(mask, 3));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src + 0, 0),
brw_vec8_grf(mask + 0, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src + 1, 0),
brw_vec8_grf(mask + 1, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src + 2, 0),
brw_vec8_grf(mask + 2, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src + 3, 0),
brw_vec8_grf(mask + 3, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n + 1, 0),
brw_vec8_grf(mask + 2*n + 1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
/* Projective variants */
 
static void brw_wm_projective_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
/* First compute 1/z */
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv+1, 0),
brw_vec8_grf(2, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(4, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
/* First compute 1/z */
brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
 
/* Now compute the output s,t values */
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
}
}
 
static int brw_wm_projective(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
bool
brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
/drivers/video/Intel-2D/sna/compiler.h
0,0 → 1,92
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef _SNA_COMPILER_H_
#define _SNA_COMPILER_H_
 
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
#define likely(expr) (__builtin_expect (!!(expr), 1))
#define unlikely(expr) (__builtin_expect (!!(expr), 0))
#define noinline __attribute__((noinline))
#define force_inline inline __attribute__((always_inline))
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define tightly_packed __attribute__((__packed__))
#define flatten __attribute__((flatten))
#define page_aligned __attribute__((aligned(4096)))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#define noinline
#define force_inline inline
#define fastcall
#define must_check
#define constant
#define pure
#define tighly_packed
#define flatten
#define page_aligned
#endif
 
#define HAS_GCC(major, minor) defined(__GNUC__) && (__GNUC__ > (major) || __GNUC__ == (major) && __GNUC_MINOR__ >= (minor))
 
#if HAS_GCC(4, 5)
#define sse2 __attribute__((target("sse2,fpmath=sse")))
#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 7)
#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast __attribute__((optimize("Ofast")))
#else
#define fast
#endif
 
#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
#define fast_memcpy __attribute__((target("inline-all-stringops")))
#else
#define fast_memcpy
#endif
 
#ifdef HAVE_VALGRIND
#define VG(x) x
#else
#define VG(x)
#endif
 
#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
 
#define COMPILE_TIME_ASSERT(E) ((void)sizeof(char[1 - 2*!(E)]))
 
#endif /* _SNA_COMPILER_H_ */
/drivers/video/Intel-2D/sna/gen3_render.c
0,0 → 1,2814
/*
* Copyright © 2010-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_render.h"
#include "sna_render_inline.h"
#include "sna_reg.h"
//#include "sna_video.h"
 
#include "gen3_render.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
 
#define PREFER_BLT_FILL 1
 
enum {
SHADER_NONE = 0,
SHADER_ZERO,
SHADER_BLACK,
SHADER_WHITE,
SHADER_CONSTANT,
SHADER_LINEAR,
SHADER_RADIAL,
SHADER_TEXTURE,
SHADER_OPACITY,
};
 
#define MAX_3D_SIZE 2048
#define MAX_3D_PITCH 8192
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_BATCH_F(v) batch_emit_float(sna, v)
#define OUT_VERTEX(v) vertex_emit(sna, v)
 
enum gen3_radial_mode {
RADIAL_ONE,
RADIAL_TWO
};
 
static const struct blendinfo {
bool dst_alpha;
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen3_blend_op[] = {
/* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
/* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
/* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
/* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
/* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
/* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
/* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
/* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
/* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
};
 
#define S6_COLOR_WRITE_ONLY \
(S6_COLOR_WRITE_ENABLE | \
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
 
static const struct formatinfo {
unsigned int fmt, xfmt;
uint32_t card_fmt;
bool rb_reversed;
} gen3_tex_formats[] = {
{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
};
 
#define xFixedToDouble(f) pixman_fixed_to_double(f)
 
static inline bool too_large(int width, int height)
{
return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
}
 
static inline uint32_t gen3_buf_tiling(uint32_t tiling)
{
uint32_t v = 0;
switch (tiling) {
case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
case I915_TILING_NONE: break;
}
return v;
}
static uint32_t gen3_get_blend_cntl(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t sblend;
uint32_t dblend;
 
sblend = BLENDFACT_ONE;
dblend = BLENDFACT_INV_SRC_ALPHA;
 
#if 0
if (op <= PictOpSrc) /* for clear and src disable blending */
return S6_COLOR_WRITE_ONLY;
 
/* If there's no dst alpha channel, adjust the blend op so that we'll
* treat it as always 1.
*/
if (gen3_blend_op[op].dst_alpha) {
if (PICT_FORMAT_A(dst_format) == 0) {
if (sblend == BLENDFACT_DST_ALPHA)
sblend = BLENDFACT_ONE;
else if (sblend == BLENDFACT_INV_DST_ALPHA)
sblend = BLENDFACT_ZERO;
}
 
/* gen3 engine reads 8bit color buffer into green channel
* in cases like color buffer blending etc., and also writes
* back green channel. So with dst_alpha blend we should use
* color factor. See spec on "8-bit rendering".
*/
if (dst_format == PICT_a8) {
if (sblend == BLENDFACT_DST_ALPHA)
sblend = BLENDFACT_DST_COLR;
else if (sblend == BLENDFACT_INV_DST_ALPHA)
sblend = BLENDFACT_INV_DST_COLR;
}
}
 
/* If the source alpha is being used, then we should only be in a case
* where the source blend factor is 0, and the source blend value is the
* mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen3_blend_op[op].src_alpha) {
if (dblend == BLENDFACT_SRC_ALPHA)
dblend = BLENDFACT_SRC_COLR;
else if (dblend == BLENDFACT_INV_SRC_ALPHA)
dblend = BLENDFACT_INV_SRC_COLR;
}
#endif
 
return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
}
static bool gen3_dst_rb_reversed(uint32_t format)
{
switch (format) {
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_r5g6b5:
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a8:
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return false;
default:
return true;
}
}
 
#define DSTORG_HORT_BIAS(x) ((x)<<20)
#define DSTORG_VERT_BIAS(x) ((x)<<16)
 
static uint32_t gen3_get_dst_format(uint32_t format)
{
#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
switch (format) {
default:
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return BIAS | COLR_BUF_ARGB8888;
case PICT_r5g6b5:
case PICT_b5g6r5:
return BIAS | COLR_BUF_RGB565;
case PICT_a1r5g5b5:
case PICT_x1r5g5b5:
case PICT_a1b5g5r5:
case PICT_x1b5g5r5:
return BIAS | COLR_BUF_ARGB1555;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
case PICT_a2b10g10r10:
case PICT_x2b10g10r10:
return BIAS | COLR_BUF_ARGB2AAA;
case PICT_a8:
return BIAS | COLR_BUF_8BIT;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
case PICT_a4b4g4r4:
case PICT_x4b4g4r4:
return BIAS | COLR_BUF_ARGB4444;
}
#undef BIAS
}
 
 
#if 0
static bool gen3_check_repeat(PicturePtr p)
{
if (!p->repeat)
return true;
 
switch (p->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
 
static uint32_t gen3_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
case PictFilterBilinear:
return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT |
FILTER_LINEAR << SS2_MIN_FILTER_SHIFT |
MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
}
}
 
static bool gen3_check_filter(PicturePtr p)
{
switch (p->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
fastcall static void
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
int16_t dst_x, dst_y;
int16_t src_x, src_y;
 
dst_x = r->dst.x + op->dst.x;
dst_y = r->dst.y + op->dst.y;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
 
gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
OUT_VERTEX(src_x + r->width);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y + r->height);
 
gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
OUT_VERTEX(src_x);
OUT_VERTEX(src_y);
}
 
fastcall static void
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2;
v[1] = box->y2;
v[2] = box->x2 + op->src.offset[0];
v[3] = box->y2 + op->src.offset[1];
 
v[4] = box->x1;
v[5] = box->y2;
v[6] = box->x1 + op->src.offset[0];
v[7] = box->y2 + op->src.offset[1];
 
v[8] = box->x1;
v[9] = box->y1;
v[10] = box->x1 + op->src.offset[0];
v[11] = box->y1 + op->src.offset[1];
 
v += 12;
box++;
} while (--nbox);
}
fastcall static void
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
const PictTransform *transform = op->src.transform;
 
do {
v[0] = box->x2;
v[1] = box->y2;
_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[2], &v[3]);
 
v[4] = box->x1;
v[5] = box->y2;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y2 + op->src.offset[1],
transform, op->src.scale,
&v[6], &v[7]);
 
v[8] = box->x1;
v[9] = box->y1;
_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
box->y1 + op->src.offset[1],
transform, op->src.scale,
&v[10], &v[11]);
 
box++;
v += 12;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[2] = v[6] + w * op->src.scale[0];
 
v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
 
fastcall static void
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
v[0] = box->x2 + op->dst.x;
v[8] = v[4] = box->x1 + op->dst.x;
v[5] = v[1] = box->y2 + op->dst.y;
v[9] = box->y1 + op->dst.y;
 
v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 12;
box++;
} while (--nbox);
}
 
fastcall static void
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x;
v[9] = r->dst.y;
 
v[0] = v[4] + w;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = r->src.x * op->src.scale[0];
v[11] = r->src.y * op->src.scale[1];
 
v[2] = v[6] + w * op->src.scale[0];
v[7] = v[3] = v[11] + h * op->src.scale[1];
}
fastcall static void
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float w = r->width;
float h = r->height;
float *v;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
v[8] = v[4] = r->dst.x + op->dst.x;
v[0] = v[4] + w;
 
v[9] = r->dst.y + op->dst.y;
v[5] = v[1] = v[9] + h;
 
v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
v[2] = v[6] + w * op->mask.scale[0];
 
v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
v[7] = v[3] = v[11] + h * op->mask.scale[1];
}
#endif
 
fastcall static void
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float dst_x, dst_y;
float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
 
dst_x = r->dst.x + op->dst.x;
dst_y = r->dst.y + op->dst.y;
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 18;
 
v[0] = dst_x + w;
v[1] = dst_y + h;
v[2] = (src_x + w) * op->src.scale[0];
v[3] = (src_y + h) * op->src.scale[1];
v[4] = (msk_x + w) * op->mask.scale[0];
v[5] = (msk_y + h) * op->mask.scale[1];
 
v[6] = dst_x;
v[7] = v[1];
v[8] = src_x * op->src.scale[0];
v[9] = v[3];
v[10] = msk_x * op->mask.scale[0];
v[11] =v[5];
 
v[12] = v[6];
v[13] = dst_y;
v[14] = v[8];
v[15] = src_y * op->src.scale[1];
v[16] = v[10];
v[17] = msk_y * op->mask.scale[1];
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static inline void
gen3_2d_perspective(struct sna *sna, int in, int out)
{
gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
gen3_fs_mul(out,
gen3_fs_operand(in, X, Y, ZERO, ONE),
gen3_fs_operand_reg(out));
}
 
static inline void
gen3_linear_coord(struct sna *sna,
const struct sna_composite_channel *channel,
int in, int out)
{
int c = channel->u.gen3.constants;
 
if (!channel->is_affine) {
gen3_2d_perspective(sna, in, FS_U0);
in = FS_U0;
}
 
gen3_fs_mov(out, gen3_fs_operand_zero());
gen3_fs_dp3(out, MASK_X,
gen3_fs_operand(in, X, Y, ONE, ZERO),
gen3_fs_operand_reg(c));
}
 
static void
gen3_radial_coord(struct sna *sna,
const struct sna_composite_channel *channel,
int in, int out)
{
int c = channel->u.gen3.constants;
 
if (!channel->is_affine) {
gen3_2d_perspective(sna, in, FS_U0);
in = FS_U0;
}
 
switch (channel->u.gen3.mode) {
case RADIAL_ONE:
/*
pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
r? = pdx*pdx + pdy*pdy
t = r?/sqrt(r?) - r1/dr;
*/
gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
gen3_fs_operand(in, X, Y, ZERO, ZERO),
gen3_fs_operand(c, Z, Z, ZERO, ZERO),
gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
gen3_fs_dp2add(FS_U0, MASK_X,
gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
gen3_fs_operand_zero());
gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
gen3_fs_mad(out, 0,
gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
break;
 
case RADIAL_TWO:
/*
pdx = x - c1x, pdy = y - c1y;
A = dx? + dy? - dr?
B = -2*(pdx*dx + pdy*dy + r1*dr);
C = pdx? + pdy? - r1?;
det = B*B - 4*A*C;
t = (-B + sqrt (det)) / (2 * A)
*/
 
/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
gen3_fs_add(FS_U0,
gen3_fs_operand(in, X, Y, ZERO, ZERO),
gen3_fs_operand(c, X, Y, Z, ZERO));
/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
gen3_fs_dp3(FS_U0, MASK_W,
gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
gen3_fs_operand(c+1, X, Y, Z, ZERO));
/* u1.x = pdx? + pdy? - r1?; [C] */
gen3_fs_dp3(FS_U1, MASK_X,
gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
/* u1.x = C, u1.y = B, u1.z=-4*A; */
gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
/* u1.x = B? - 4*A*C */
gen3_fs_dp2add(FS_U1, MASK_X,
gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
gen3_fs_operand_zero());
/* out.x = -B + sqrt (B? - 4*A*C), */
gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
gen3_fs_mad(out, MASK_X,
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
/* out.x = (-B + sqrt (B? - 4*A*C)) / (2 * A), */
gen3_fs_mul(out,
gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
break;
}
}
 
static void
gen3_composite_emit_shader(struct sna *sna,
const struct sna_composite_op *op,
uint8_t blend)
{
bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
const struct sna_composite_channel *src, *mask;
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t shader_offset, id;
int src_reg, mask_reg;
int t, length;
 
src = &op->src;
mask = &op->mask;
if (mask->u.gen3.type == SHADER_NONE)
mask = NULL;
 
id = (src->u.gen3.type |
src->is_affine << 4 |
src->alpha_fixup << 5 |
src->rb_reversed << 6);
if (mask) {
id |= (mask->u.gen3.type << 8 |
mask->is_affine << 12 |
gen3_blend_op[blend].src_alpha << 13 |
op->has_component_alpha << 14 |
mask->alpha_fixup << 15 |
mask->rb_reversed << 16);
}
id |= dst_is_alpha << 24;
id |= op->rb_reversed << 25;
 
if (id == state->last_shader)
return;
 
state->last_shader = id;
 
shader_offset = sna->kgem.nbatch++;
t = 0;
switch (src->u.gen3.type) {
case SHADER_NONE:
case SHADER_OPACITY:
assert(0);
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
gen3_fs_dcl(FS_T8);
src_reg = FS_T8;
break;
case SHADER_TEXTURE:
case SHADER_RADIAL:
case SHADER_LINEAR:
gen3_fs_dcl(FS_S0);
gen3_fs_dcl(FS_T0);
t++;
break;
}
 
if (mask == NULL) {
switch (src->u.gen3.type) {
case SHADER_ZERO:
gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
goto done;
case SHADER_BLACK:
if (dst_is_alpha)
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
else
gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
goto done;
case SHADER_WHITE:
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
goto done;
}
if (src->alpha_fixup && dst_is_alpha) {
gen3_fs_mov(FS_OC, gen3_fs_operand_one());
goto done;
}
/* No mask, so load directly to output color */
if (src->u.gen3.type != SHADER_CONSTANT) {
if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
src_reg = FS_R0;
else
src_reg = FS_OC;
}
switch (src->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(src_reg, FS_S0, FS_R0);
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(src_reg, FS_S0, FS_R0);
break;
 
case SHADER_TEXTURE:
if (src->is_affine)
gen3_fs_texld(src_reg, FS_S0, FS_T0);
else
gen3_fs_texldp(src_reg, FS_S0, FS_T0);
break;
 
case SHADER_NONE:
case SHADER_WHITE:
case SHADER_BLACK:
case SHADER_ZERO:
assert(0);
case SHADER_CONSTANT:
break;
}
 
if (src_reg != FS_OC) {
if (src->alpha_fixup)
gen3_fs_mov(FS_OC,
src->rb_reversed ^ op->rb_reversed ?
gen3_fs_operand(src_reg, Z, Y, X, ONE) :
gen3_fs_operand(src_reg, X, Y, Z, ONE));
else if (dst_is_alpha)
gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
else if (src->rb_reversed ^ op->rb_reversed)
gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
else
gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
} else if (src->alpha_fixup)
gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
} else {
int out_reg = FS_OC;
if (op->rb_reversed)
out_reg = FS_U0;
 
switch (mask->u.gen3.type) {
case SHADER_CONSTANT:
gen3_fs_dcl(FS_T9);
mask_reg = FS_T9;
break;
case SHADER_TEXTURE:
case SHADER_LINEAR:
case SHADER_RADIAL:
gen3_fs_dcl(FS_S0 + t);
/* fall through */
case SHADER_OPACITY:
gen3_fs_dcl(FS_T0 + t);
break;
case SHADER_ZERO:
case SHADER_BLACK:
assert(0);
case SHADER_NONE:
case SHADER_WHITE:
break;
}
 
t = 0;
switch (src->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(FS_R0, FS_S0, FS_R0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, src, FS_T0, FS_R0);
gen3_fs_texld(FS_R0, FS_S0, FS_R0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_TEXTURE:
if (src->is_affine)
gen3_fs_texld(FS_R0, FS_S0, FS_T0);
else
gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
src_reg = FS_R0;
t++;
break;
 
case SHADER_CONSTANT:
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
}
if (src->alpha_fixup)
gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
if (src->rb_reversed)
gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
 
switch (mask->u.gen3.type) {
case SHADER_LINEAR:
gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
mask_reg = FS_R1;
break;
 
case SHADER_RADIAL:
gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
mask_reg = FS_R1;
break;
 
case SHADER_TEXTURE:
if (mask->is_affine)
gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
else
gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
mask_reg = FS_R1;
break;
 
case SHADER_OPACITY:
switch (src->u.gen3.type) {
case SHADER_BLACK:
case SHADER_WHITE:
if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
gen3_fs_mov(out_reg,
gen3_fs_operand(FS_T0 + t, X, X, X, X));
} else {
gen3_fs_mov(out_reg,
gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
}
break;
default:
if (dst_is_alpha) {
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand(FS_T0 + t, X, X, X, X));
} else {
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, X, Y, Z, W),
gen3_fs_operand(FS_T0 + t, X, X, X, X));
}
}
goto mask_done;
 
case SHADER_CONSTANT:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_NONE:
break;
}
if (mask->alpha_fixup)
gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
if (mask->rb_reversed)
gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
 
if (dst_is_alpha) {
switch (src->u.gen3.type) {
case SHADER_BLACK:
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, W, W, W, W));
break;
default:
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand(mask_reg, W, W, W, W));
break;
}
} else {
/* If component alpha is active in the mask and the blend
* operation uses the source alpha, then we know we don't
* need the source value (otherwise we would have hit a
* fallback earlier), so we provide the source alpha (src.A *
* mask.X) as output color.
* Conversely, if CA is set and we don't need the source alpha,
* then we produce the source value (src.X * mask.X) and the
* source alpha is unused. Otherwise, we provide the non-CA
* source value (src.X * mask.A).
*/
if (op->has_component_alpha) {
switch (src->u.gen3.type) {
case SHADER_BLACK:
if (gen3_blend_op[blend].src_alpha)
gen3_fs_mov(out_reg,
gen3_fs_operand_reg(mask_reg));
else
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
break;
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand_reg(mask_reg));
break;
default:
if (gen3_blend_op[blend].src_alpha)
gen3_fs_mul(out_reg,
gen3_fs_operand(src_reg, W, W, W, W),
gen3_fs_operand_reg(mask_reg));
else
gen3_fs_mul(out_reg,
gen3_fs_operand_reg(src_reg),
gen3_fs_operand_reg(mask_reg));
break;
}
} else {
switch (src->u.gen3.type) {
case SHADER_WHITE:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, W, W, W, W));
break;
case SHADER_BLACK:
gen3_fs_mov(out_reg,
gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
break;
default:
gen3_fs_mul(out_reg,
gen3_fs_operand_reg(src_reg),
gen3_fs_operand(mask_reg, W, W, W, W));
break;
}
}
}
mask_done:
if (op->rb_reversed)
gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
}
 
done:
length = sna->kgem.nbatch - shader_offset;
sna->kgem.batch[shader_offset] =
_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
}
 
static uint32_t gen3_ms_tiling(uint32_t tiling)
{
uint32_t v = 0;
switch (tiling) {
case I915_TILING_Y: v |= MS3_TILE_WALK;
case I915_TILING_X: v |= MS3_TILED_SURFACE;
case I915_TILING_NONE: break;
}
return v;
}
 
static void gen3_emit_invariant(struct sna *sna)
{
/* Disable independent alpha blend */
OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
 
OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
CSB_TCB(0, 0) |
CSB_TCB(1, 1) |
CSB_TCB(2, 2) |
CSB_TCB(3, 3) |
CSB_TCB(4, 4) |
CSB_TCB(5, 5) |
CSB_TCB(6, 6) |
CSB_TCB(7, 7));
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE |
S4_VFMT_XY);
OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
 
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
 
OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
OUT_BATCH(0x00000000);
 
OUT_BATCH(_3DSTATE_STIPPLE);
OUT_BATCH(0x00000000);
 
sna->render_state.gen3.need_invariant = false;
}
 
#define MAX_OBJECTS 3 /* worst case: dst + src + mask */
 
static void
gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch(&sna->kgem, 200)) {
DBG(("%s: flushing batch: size %d > %d\n",
__FUNCTION__, 200,
sna->kgem.surface-sna->kgem.nbatch));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
DBG(("%s: flushing batch: reloc %d >= %d\n",
__FUNCTION__,
sna->kgem.nreloc,
(int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
DBG(("%s: flushing batch: exec %d >= %d\n",
__FUNCTION__,
sna->kgem.nexec,
(int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen3.need_invariant)
gen3_emit_invariant(sna);
#undef MAX_OBJECTS
}
 
static void gen3_emit_target(struct sna *sna,
struct kgem_bo *bo,
int width,
int height,
int format)
{
struct gen3_render_state *state = &sna->render_state.gen3;
 
assert(!too_large(width, height));
 
/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
assert(bo->unique_id != 0);
if (bo->unique_id != state->current_dst) {
uint32_t v;
 
DBG(("%s: setting new target id=%d, handle=%d\n",
__FUNCTION__, bo->unique_id, bo->handle));
 
OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
OUT_BATCH(BUF_3D_ID_COLOR_BACK |
gen3_buf_tiling(bo->tiling) |
bo->pitch);
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
bo,
I915_GEM_DOMAIN_RENDER << 16 |
I915_GEM_DOMAIN_RENDER,
0));
 
OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
OUT_BATCH(gen3_get_dst_format(format));
 
v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
if (v != state->last_drawrect_limit) {
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
OUT_BATCH(0); /* XXX dither origin? */
OUT_BATCH(0);
OUT_BATCH(v);
OUT_BATCH(0);
state->last_drawrect_limit = v;
}
 
state->current_dst = bo->unique_id;
}
assert(bo->exec);
kgem_bo_mark_dirty(bo);
}
 
static void gen3_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t map[4];
uint32_t sampler[4];
struct kgem_bo *bo[2];
unsigned int tex_count, n;
uint32_t ss2;
 
gen3_get_batch(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
else
OUT_BATCH(_3DSTATE_MODES_5_CMD |
PIPELINE_FLUSH_RENDER_CACHE |
PIPELINE_FLUSH_TEXTURE_CACHE);
kgem_clear_dirty(&sna->kgem);
}
 
gen3_emit_target(sna,
op->dst.bo,
op->dst.width,
op->dst.height,
op->dst.format);
 
ss2 = ~0;
tex_count = 0;
switch (op->src.u.gen3.type) {
case SHADER_OPACITY:
case SHADER_NONE:
assert(0);
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->src.u.gen3.mode != state->last_diffuse) {
OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
OUT_BATCH(op->src.u.gen3.mode);
state->last_diffuse = op->src.u.gen3.mode;
}
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
case SHADER_TEXTURE:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->src.card_format);
map[tex_count * 2 + 0] =
op->src.card_format |
gen3_ms_tiling(op->src.bo->tiling) |
(op->src.height - 1) << MS3_HEIGHT_SHIFT |
(op->src.width - 1) << MS3_WIDTH_SHIFT;
map[tex_count * 2 + 1] =
(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
 
sampler[tex_count * 2 + 0] = op->src.filter;
sampler[tex_count * 2 + 1] =
op->src.repeat |
tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
bo[tex_count] = op->src.bo;
tex_count++;
break;
}
switch (op->mask.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
break;
case SHADER_CONSTANT:
if (op->mask.u.gen3.mode != state->last_specular) {
OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
OUT_BATCH(op->mask.u.gen3.mode);
state->last_specular = op->mask.u.gen3.mode;
}
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
case SHADER_TEXTURE:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count,
op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
assert(op->mask.card_format);
map[tex_count * 2 + 0] =
op->mask.card_format |
gen3_ms_tiling(op->mask.bo->tiling) |
(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
(op->mask.width - 1) << MS3_WIDTH_SHIFT;
map[tex_count * 2 + 1] =
(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
 
sampler[tex_count * 2 + 0] = op->mask.filter;
sampler[tex_count * 2 + 1] =
op->mask.repeat |
tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
bo[tex_count] = op->mask.bo;
tex_count++;
break;
case SHADER_OPACITY:
ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
break;
}
 
{
uint32_t blend_offset = sna->kgem.nbatch;
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
OUT_BATCH(ss2);
OUT_BATCH(gen3_get_blend_cntl(op->op,
op->has_component_alpha,
op->dst.format));
 
if (memcmp(sna->kgem.batch + state->last_blend + 1,
sna->kgem.batch + blend_offset + 1,
2 * 4) == 0)
sna->kgem.nbatch = blend_offset;
else
state->last_blend = blend_offset;
}
 
if (op->u.gen3.num_constants) {
int count = op->u.gen3.num_constants;
if (state->last_constants) {
int last = sna->kgem.batch[state->last_constants+1];
if (last == (1 << (count >> 2)) - 1 &&
memcmp(&sna->kgem.batch[state->last_constants+2],
op->u.gen3.constants,
count * sizeof(uint32_t)) == 0)
count = 0;
}
if (count) {
state->last_constants = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
OUT_BATCH((1 << (count >> 2)) - 1);
 
memcpy(sna->kgem.batch + sna->kgem.nbatch,
op->u.gen3.constants,
count * sizeof(uint32_t));
sna->kgem.nbatch += count;
}
}
 
if (tex_count != 0) {
uint32_t rewind;
 
n = 0;
if (tex_count == state->tex_count) {
for (; n < tex_count; n++) {
if (map[2*n+0] != state->tex_map[2*n+0] ||
map[2*n+1] != state->tex_map[2*n+1] ||
state->tex_handle[n] != bo[n]->handle ||
state->tex_delta[n] != bo[n]->delta)
break;
}
}
if (n < tex_count) {
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
OUT_BATCH((1 << tex_count) - 1);
for (n = 0; n < tex_count; n++) {
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
bo[n],
I915_GEM_DOMAIN_SAMPLER<< 16,
0));
OUT_BATCH(map[2*n + 0]);
OUT_BATCH(map[2*n + 1]);
 
state->tex_map[2*n+0] = map[2*n+0];
state->tex_map[2*n+1] = map[2*n+1];
state->tex_handle[n] = bo[n]->handle;
state->tex_delta[n] = bo[n]->delta;
}
state->tex_count = n;
}
 
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
OUT_BATCH((1 << tex_count) - 1);
for (n = 0; n < tex_count; n++) {
OUT_BATCH(sampler[2*n + 0]);
OUT_BATCH(sampler[2*n + 1]);
OUT_BATCH(0);
}
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler+1],
&sna->kgem.batch[rewind + 1],
(3*tex_count + 1)*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
}
 
gen3_composite_emit_shader(sna, op, op->op);
}
 
static bool gen3_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s(%d)\n", __FUNCTION__,
sna->render.vertex_index - sna->render.vertex_start));
 
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
gen3_composite_emit_shader(sna, op, PictOpAdd);
 
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
(sna->render.vertex_index - sna->render.vertex_start));
OUT_BATCH(sna->render.vertex_start);
 
sna->render_state.gen3.last_blend = 0;
return true;
}
 
static void gen3_vertex_flush(struct sna *sna)
{
assert(sna->render.vertex_offset);
 
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
 
sna->kgem.batch[sna->render.vertex_offset] =
PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
(sna->render.vertex_index - sna->render.vertex_start);
sna->kgem.batch[sna->render.vertex_offset + 1] =
sna->render.vertex_start;
 
sna->render.vertex_offset = 0;
}
 
static int gen3_vertex_finish(struct sna *sna)
{
struct kgem_bo *bo;
 
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
sna_vertex_wait__locked(&sna->render);
 
bo = sna->render.vbo;
if (bo) {
DBG(("%s: reloc = %d\n", __FUNCTION__,
sna->render.vertex_reloc[0]));
 
if (sna->render.vertex_reloc[0]) {
sna->kgem.batch[sna->render.vertex_reloc[0]] =
kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
 
sna->render.vertex_reloc[0] = 0;
}
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render.vbo = NULL;
 
kgem_bo_destroy(&sna->kgem, bo);
}
 
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem,
256*1024, CREATE_GTT_MAP);
if (sna->render.vbo)
sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
if (sna->render.vbo)
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
return 0;
}
assert(sna->render.vbo->snoop == false);
 
if (sna->render.vertex_used) {
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
}
sna->render.vertex_size = 64 * 1024 - 1;
return sna->render.vertex_size - sna->render.vertex_used;
}
 
static void gen3_vertex_close(struct sna *sna)
{
struct kgem_bo *bo, *free_bo = NULL;
unsigned int delta = 0;
 
assert(sna->render.vertex_offset == 0);
if (sna->render.vertex_reloc[0] == 0)
return;
 
DBG(("%s: used=%d/%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
sna->render.vbo ? sna->render.vbo->handle : 0));
 
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding full vbo\n", __FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map)) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
if (sna->render.vertices == NULL) {
DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
}
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
memcpy(sna->kgem.batch + sna->kgem.nbatch,
sna->render.vertex_data,
sna->render.vertex_used * 4);
delta = sna->kgem.nbatch * 4;
bo = NULL;
sna->kgem.nbatch += sna->render.vertex_used;
} else {
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
bo = kgem_create_linear(&sna->kgem,
4*sna->render.vertex_used,
CREATE_NO_THROTTLE);
if (bo) {
assert(bo->snoop == false);
kgem_bo_write(&sna->kgem, bo,
sna->render.vertex_data,
4*sna->render.vertex_used);
}
free_bo = bo;
}
}
 
DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
sna->kgem.batch[sna->render.vertex_reloc[0]] =
kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
sna->render.vertex_reloc[0] = 0;
 
if (sna->render.vbo == NULL) {
DBG(("%s: resetting vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
}
 
if (free_bo)
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
static bool gen3_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen3_render_state *state = &sna->render_state.gen3;
int ndwords, i1_cmd = 0, i1_len = 0;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = 2;
if (op->need_magic_ca_pass)
ndwords += 100;
if (sna->render.vertex_reloc[0] == 0)
i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
if (state->floats_per_vertex != op->floats_per_vertex)
i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
 
if (!kgem_check_batch(&sna->kgem, ndwords+1))
return false;
 
if (i1_cmd) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
if (sna->render.vertex_reloc[0] == 0)
sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
if (state->floats_per_vertex != op->floats_per_vertex) {
state->floats_per_vertex = op->floats_per_vertex;
OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
}
}
 
if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
!op->need_magic_ca_pass) {
sna->render.vertex_offset = state->last_vertex_offset;
} else {
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(MI_NOOP); /* to be filled later */
OUT_BATCH(MI_NOOP);
sna->render.vertex_start = sna->render.vertex_index;
state->last_vertex_offset = sna->render.vertex_offset;
}
 
return true;
}
 
static int gen3_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
return 0;
 
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
if (gen3_magic_ca_pass(sna, op)) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(gen3_get_blend_cntl(op->op,
op->has_component_alpha,
op->dst.format));
gen3_composite_emit_shader(sna, op, op->op);
}
}
 
return gen3_vertex_finish(sna);
}
 
inline static int gen3_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want)
{
int rem;
 
DBG(("%s: want=%d, rem=%d\n",
__FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
 
assert(want);
assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
 
start:
rem = vertex_space(sna);
if (unlikely(op->floats_per_rect > rem)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen3_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen3_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
sna->render.vertex_index += 3*want;
 
assert(want);
assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
return want;
 
flush:
DBG(("%s: flushing batch\n", __FUNCTION__));
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
gen3_emit_composite_state(sna, op);
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_reloc[0] == 0);
goto start;
}
 
fastcall static void
gen3_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen3_get_rectangles(sna, op, 1);
 
op->prim_emit(sna, op, r);
}
 
static void
gen3_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
if (sna->render.vertex_offset) {
gen3_vertex_flush(sna);
gen3_magic_ca_pass(sna, op);
}
 
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen3_render_reset(struct sna *sna)
{
struct gen3_render_state *state = &sna->render_state.gen3;
 
state->need_invariant = true;
state->current_dst = 0;
state->tex_count = 0;
state->last_drawrect_limit = ~0U;
state->last_target = 0;
state->last_blend = 0;
state->last_constants = 0;
state->last_sampler = 0;
state->last_shader = 0x7fffffff;
state->last_diffuse = 0xcc00ffee;
state->last_specular = 0xcc00ffee;
 
state->floats_per_vertex = 0;
state->last_floats_per_vertex = 0;
state->last_vertex_offset = 0;
 
if (sna->render.vbo != NULL &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding vbo as next access will stall: %d\n",
__FUNCTION__, sna->render.vbo->presumed_offset));
discard_vbo(sna);
}
 
sna->render.vertex_reloc[0] = 0;
sna->render.vertex_offset = 0;
}
 
static void
gen3_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vertex_reloc[0] == 0 &&
sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen3_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
CARD32 format)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
if (gen3_tex_formats[i].fmt == format) {
channel->card_format = gen3_tex_formats[i].card_fmt;
channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
return true;
}
}
return false;
}
 
#if 0
static int
gen3_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_op *op,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->card_format = 0;
 
if (picture->pDrawable == NULL) {
SourcePict *source = picture->pSourcePict;
int ret = -1;
 
switch (source->type) {
case SourcePictTypeSolidFill:
DBG(("%s: solid fill [%08x], format %08x\n",
__FUNCTION__,
(unsigned)source->solidFill.color,
(unsigned)picture->format));
ret = gen3_init_solid(channel, source->solidFill.color);
break;
 
case SourcePictTypeLinear:
ret = gen3_init_linear(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
 
case SourcePictTypeRadial:
ret = gen3_init_radial(sna, picture, op, channel,
x - dst_x, y - dst_y);
break;
}
 
if (ret == -1) {
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (sna_picture_is_solid(picture, &color)) {
DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color);
}
 
if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
}
 
if (!gen3_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen3_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
channel->pict_format = picture->format;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else {
channel->transform = picture->transform;
channel->is_affine = sna_transform_is_affine(picture->transform);
}
 
if (!gen3_composite_channel_set_format(channel, picture->format) &&
!gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
assert(channel->card_format);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
__FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height,
x, y, w, h));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static inline bool
source_use_blt(struct sna *sna, PicturePtr picture)
{
/* If it is a solid, try to use the BLT paths */
if (!picture->pDrawable)
return picture->pSourcePict->type == SourcePictTypeSolidFill;
 
if (picture->pDrawable->width == 1 &&
picture->pDrawable->height == 1 &&
picture->repeat)
return true;
 
if (too_large(picture->pDrawable->width, picture->pDrawable->height))
return true;
 
return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst,
PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
DBG(("%s: target too large for 3D pipe (%d, %d)\n",
__FUNCTION__,
dst->pDrawable->width, dst->pDrawable->height));
return true;
}
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return source_use_blt(sna, src);
}
#endif
 
static void
gen3_align_vertex(struct sna *sna,
const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen3_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen3.last_floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
}
}
 
static inline bool is_constant_ps(uint32_t type)
{
switch (type) {
case SHADER_NONE: /* be warned! */
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
return true;
default:
return false;
}
}
 
#if 0
static bool
gen3_composite_fallback(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen3_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
if (mask &&
mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
op != PictOpOver) {
DBG(("%s: component-alpha mask with op=%d, should fallback\n",
__FUNCTION__, op));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask && mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
__FUNCTION__, dst_use_cpu(dst_pixmap)));
return dst_use_cpu(dst_pixmap);
}
 
static bool
gen3_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t mask_x, int16_t mask_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s()\n", __FUNCTION__));
 
if (op >= ARRAY_SIZE(gen3_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
/* Try to use the BLT engine unless it implies a
* 3D -> 2D context switch.
*/
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna,
op, src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen3_composite_fallback(sna, op, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
mask_x, mask_y,
dst_x, dst_y,
width, height,
tmp);
 
if (!gen3_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height)) {
DBG(("%s: unable to set render target\n",
__FUNCTION__));
return false;
}
 
tmp->op = op;
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
if (too_large(tmp->dst.width, tmp->dst.height) ||
!gen3_check_pitch_3d(tmp->dst.bo)) {
if (!sna_render_composite_redirect(sna, tmp,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
}
 
tmp->u.gen3.num_constants = 0;
tmp->src.u.gen3.type = SHADER_TEXTURE;
tmp->src.is_affine = true;
DBG(("%s: preparing source\n", __FUNCTION__));
switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
tmp->src.u.gen3.type = SHADER_ZERO;
break;
case 1:
if (mask == NULL && tmp->src.bo &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen3_composite_channel_convert(&tmp->src);
break;
}
DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
 
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->mask.is_affine = true;
tmp->need_magic_ca_pass = false;
tmp->has_component_alpha = false;
if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, mask_x, mask_y)) {
tmp->mask.u.gen3.type = SHADER_TEXTURE;
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
mask_x, mask_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
tmp->mask.u.gen3.type = SHADER_ZERO;
break;
case 1:
gen3_composite_channel_convert(&tmp->mask);
break;
}
}
DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
if (tmp->mask.u.gen3.type == SHADER_ZERO) {
if (tmp->src.bo) {
kgem_bo_destroy(&sna->kgem,
tmp->src.bo);
tmp->src.bo = NULL;
}
tmp->src.u.gen3.type = SHADER_ZERO;
tmp->mask.u.gen3.type = SHADER_NONE;
}
 
if (tmp->mask.u.gen3.type != SHADER_NONE) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
/* Check if it's component alpha that relies on a source alpha
* and on the source value. We can only get one of those
* into the single source value that we get to blend with.
*/
DBG(("%s: component-alpha mask: %d\n",
__FUNCTION__, tmp->mask.u.gen3.type));
tmp->has_component_alpha = true;
if (tmp->mask.u.gen3.type == SHADER_WHITE) {
tmp->mask.u.gen3.type = SHADER_NONE;
tmp->has_component_alpha = false;
} else if (gen3_blend_op[op].src_alpha &&
gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
if (op != PictOpOver)
goto cleanup_mask;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
} else {
if (tmp->mask.is_opaque) {
tmp->mask.u.gen3.type = SHADER_NONE;
} else if (is_constant_ps(tmp->src.u.gen3.type) &&
is_constant_ps(tmp->mask.u.gen3.type)) {
uint32_t v;
 
v = multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
24);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
16);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
8);
v |= multa(tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
0);
 
DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
__FUNCTION__,
tmp->src.u.gen3.mode,
tmp->mask.u.gen3.mode,
v));
 
tmp->src.u.gen3.type = SHADER_CONSTANT;
tmp->src.u.gen3.mode = v;
tmp->src.is_opaque = false;
 
tmp->mask.u.gen3.type = SHADER_NONE;
}
}
}
}
DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
tmp->src.is_affine, tmp->mask.is_affine));
 
tmp->prim_emit = gen3_emit_composite_primitive;
if (is_constant_ps(tmp->mask.u.gen3.type)) {
switch (tmp->src.u.gen3.type) {
case SHADER_NONE:
case SHADER_ZERO:
case SHADER_BLACK:
case SHADER_WHITE:
case SHADER_CONSTANT:
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant;
tmp->emit_boxes = gen3_emit_composite_boxes_constant;
}
 
break;
case SHADER_LINEAR:
case SHADER_RADIAL:
if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
}
}
break;
case SHADER_TEXTURE:
if (tmp->src.transform == NULL) {
if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
}
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
}
}
break;
}
} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
if (tmp->mask.transform == NULL) {
if (is_constant_ps(tmp->src.u.gen3.type)) {
if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
}
} else {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
}
}
} else if (tmp->src.transform == NULL) {
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
}
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
#if defined(sse2) && !defined(__x86_64__)
if (sna->cpu_features & SSE2) {
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
} else
#endif
{
tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
}
}
}
}
 
tmp->floats_per_vertex = 2;
if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
!is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
!is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
tmp->floats_per_vertex,
tmp->prim_emit != gen3_emit_composite_primitive));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
tmp->blt = gen3_render_composite_blt;
tmp->box = gen3_render_composite_box;
tmp->boxes = gen3_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen3_render_composite_boxes;
tmp->thread_boxes = gen3_render_composite_boxes__thread;
}
tmp->done = gen3_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void gen3_render_flush(struct sna *sna)
{
gen3_vertex_close(sna);
 
assert(sna->render.vertex_reloc[0] == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen3_render_fini(struct sna *sna)
{
}
 
const char *gen3_render_init(struct sna *sna, const char *backend)
{
struct sna_render *render = &sna->render;
 
#if 0
#if !NO_COMPOSITE
render->composite = gen3_render_composite;
render->prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
render->check_composite_spans = gen3_check_composite_spans;
render->composite_spans = gen3_render_composite_spans;
render->prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
render->video = gen3_render_video;
 
render->copy_boxes = gen3_render_copy_boxes;
render->copy = gen3_render_copy;
 
render->fill_boxes = gen3_render_fill_boxes;
render->fill = gen3_render_fill;
render->fill_one = gen3_render_fill_one;
#endif
 
render->blit_tex = gen3_blit_tex;
render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
render->reset = gen3_render_reset;
render->flush = gen3_render_flush;
render->fini = gen3_render_fini;
 
render->max_3d_size = MAX_3D_SIZE;
render->max_3d_pitch = MAX_3D_PITCH;
 
sna->kgem.retire = gen3_render_retire;
sna->kgem.expire = gen3_render_expire;
return "Alviso (gen3)";
}
 
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
 
tmp->u.gen3.num_constants = 0;
tmp->src.u.gen3.type = SHADER_TEXTURE;
tmp->src.is_affine = true;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
 
gen3_composite_channel_set_format(&tmp->src, tmp->src.pict_format);
 
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->mask.u.gen3.type = SHADER_TEXTURE;
tmp->mask.is_affine = true;
tmp->need_magic_ca_pass = false;
tmp->has_component_alpha = false;
 
 
tmp->mask.repeat = RepeatNone;
tmp->mask.filter = PictFilterNearest;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
gen3_composite_channel_set_format(&tmp->mask, tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
 
 
tmp->floats_per_vertex = 2;
if (!is_constant_ps(tmp->src.u.gen3.type))
tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
if (!is_constant_ps(tmp->mask.u.gen3.type))
tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
// DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
// !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
// !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
// tmp->floats_per_vertex,
// tmp->prim_emit != gen3_emit_composite_primitive));
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
tmp->blt = gen3_render_composite_blt;
 
tmp->done = gen3_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen3_emit_composite_state(sna, tmp);
gen3_align_vertex(sna, tmp);
return true;
}
/drivers/video/Intel-2D/sna/gen3_render.h
0,0 → 1,1479
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef _I915_REG_H_
#define _I915_REG_H_
 
#define CMD_3D (3 << 29)
 
#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value)
 
#define PRIM3D (CMD_3D | (0x1f<<24))
#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17))
#define PRIM3D_TRILIST (PRIM3D | (0x0<<18))
#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18))
#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18))
#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18))
#define PRIM3D_POLY (PRIM3D | (0x4<<18))
#define PRIM3D_LINELIST (PRIM3D | (0x5<<18))
#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18))
#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18))
#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18))
#define PRIM3D_DIB (PRIM3D | (0x9<<18))
#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18))
#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18))
#define PRIM3D_MASK (0x1f<<18)
 
 
/* p137 */
#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24))
#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16)
#define AA_LINE_ECAAR_WIDTH_0_5 0
#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14)
#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14)
#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14)
#define AA_LINE_REGION_WIDTH_ENABLE (1<<8)
#define AA_LINE_REGION_WIDTH_0_5 0
#define AA_LINE_REGION_WIDTH_1_0 (1<<6)
#define AA_LINE_REGION_WIDTH_2_0 (2<<6)
#define AA_LINE_REGION_WIDTH_4_0 (3<<6)
 
/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/
#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24))
#define BFO_ENABLE_STENCIL_REF (1<<23)
#define BFO_STENCIL_REF_SHIFT 15
#define BFO_STENCIL_REF_MASK (0xff<<15)
#define BFO_ENABLE_STENCIL_FUNCS (1<<14)
#define BFO_STENCIL_TEST_SHIFT 11
#define BFO_STENCIL_TEST_MASK (0x7<<11)
#define BFO_STENCIL_FAIL_SHIFT 8
#define BFO_STENCIL_FAIL_MASK (0x7<<8)
#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5
#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5)
#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2
#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2)
#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1)
#define BFO_STENCIL_TWO_SIDE (1<<0)
 
/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */
#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24))
#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17)
#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16)
#define BFM_STENCIL_TEST_MASK_SHIFT 8
#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8)
#define BFM_STENCIL_WRITE_MASK_SHIFT 0
#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0)
 
/* 3DSTATE_BIN_CONTROL p141 */
 
/* p143 */
#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1)
/* Dword 1 */
#define BUF_3D_ID_COLOR_BACK (0x3<<24)
#define BUF_3D_ID_DEPTH (0x7<<24)
#define BUF_3D_USE_FENCE (1<<23)
#define BUF_3D_TILED_SURFACE (1<<22)
#define BUF_3D_TILE_WALK_X 0
#define BUF_3D_TILE_WALK_Y (1<<21)
/* Dword 2 */
#define BUF_3D_ADDR(x) ((x) & ~0x3)
 
/* 3DSTATE_CHROMA_KEY */
 
/* 3DSTATE_CLEAR_PARAMETERS, p150 */
#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5)
/* Dword 1 */
#define CLEARPARAM_CLEAR_RECT (1 << 16)
#define CLEARPARAM_ZONE_INIT (0 << 16)
#define CLEARPARAM_WRITE_COLOR (1 << 2)
#define CLEARPARAM_WRITE_DEPTH (1 << 1)
#define CLEARPARAM_WRITE_STENCIL (1 << 0)
 
/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */
#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16))
 
/* 3DSTATE_COORD_SET_BINDINGS, p154 */
#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24))
#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3))
 
/* p156 */
#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16))
 
/* p157 */
#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16))
 
/* p158 */
#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16))
 
/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */
#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16))
/* scale in dword 1 */
 
/* The depth subrectangle is not supported, but must be disabled. */
/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */
#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0))
 
/* p161 */
#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16))
/* Dword 1 */
#define TEX_DEFAULT_COLOR_OGL (0<<30)
#define TEX_DEFAULT_COLOR_D3D (1<<30)
#define ZR_EARLY_DEPTH (1<<29)
#define LOD_PRECLAMP_OGL (1<<28)
#define LOD_PRECLAMP_D3D (0<<28)
#define DITHER_FULL_ALWAYS (0<<26)
#define DITHER_FULL_ON_FB_BLEND (1<<26)
#define DITHER_CLAMPED_ALWAYS (2<<26)
#define LINEAR_GAMMA_BLEND_32BPP (1<<25)
#define DEBUG_DISABLE_ENH_DITHER (1<<24)
#define DSTORG_HORT_BIAS(x) ((x)<<20)
#define DSTORG_VERT_BIAS(x) ((x)<<16)
#define COLOR_4_2_2_CHNL_WRT_ALL 0
#define COLOR_4_2_2_CHNL_WRT_Y (1<<12)
#define COLOR_4_2_2_CHNL_WRT_CR (2<<12)
#define COLOR_4_2_2_CHNL_WRT_CB (3<<12)
#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12)
#define COLR_BUF_8BIT 0
#define COLR_BUF_RGB555 (1<<8)
#define COLR_BUF_RGB565 (2<<8)
#define COLR_BUF_ARGB8888 (3<<8)
#define COLR_BUF_ARGB4444 (8<<8)
#define COLR_BUF_ARGB1555 (9<<8)
#define COLR_BUF_ARGB2AAA (0xa<<8)
#define DEPTH_IS_Z 0
#define DEPTH_IS_W (1<<6)
#define DEPTH_FRMT_16_FIXED 0
#define DEPTH_FRMT_16_FLOAT (1<<2)
#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2)
#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2)
#define VERT_LINE_STRIDE_1 (1<<1)
#define VERT_LINE_STRIDE_0 0
#define VERT_LINE_STRIDE_OFS_1 1
#define VERT_LINE_STRIDE_OFS_0 0
 
/* p166 */
#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3)
/* Dword 1 */
#define DRAW_RECT_DIS_DEPTH_OFS (1<<30)
#define DRAW_DITHER_OFS_X(x) ((x)<<26)
#define DRAW_DITHER_OFS_Y(x) ((x)<<24)
/* Dword 2 */
#define DRAW_YMIN(x) ((uint16_t)(x)<<16)
#define DRAW_XMIN(x) ((uint16_t)(x))
/* Dword 3 */
#define DRAW_YMAX(x) ((uint16_t)(x)<<16)
#define DRAW_XMAX(x) ((uint16_t)(x))
/* Dword 4 */
#define DRAW_YORG(x) ((uint16_t)(x)<<16)
#define DRAW_XORG(x) ((uint16_t)(x))
 
/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */
 
/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */
 
/* _3DSTATE_FOG_COLOR, p173 */
#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24))
#define FOG_COLOR_RED(x) ((x)<<16)
#define FOG_COLOR_GREEN(x) ((x)<<8)
#define FOG_COLOR_BLUE(x) (x)
 
/* _3DSTATE_FOG_MODE, p174 */
#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2)
/* Dword 1 */
#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31)
#define FMC1_FOGFUNC_VERTEX (0<<28)
#define FMC1_FOGFUNC_PIXEL_EXP (1<<28)
#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28)
#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28)
#define FMC1_FOGFUNC_MASK (3<<28)
#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27)
#define FMC1_FOGINDEX_Z (0<<25)
#define FMC1_FOGINDEX_W (1<<25)
#define FMC1_C1_C2_MODIFY_ENABLE (1<<24)
#define FMC1_DENSITY_MODIFY_ENABLE (1<<23)
#define FMC1_C1_ONE (1<<13)
#define FMC1_C1_MASK (0xffff<<4)
/* Dword 2 */
#define FMC2_C2_ONE (1<<16)
/* Dword 3 */
#define FMC3_D_ONE (1<<16)
 
/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */
#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24))
#define IAB_MODIFY_ENABLE (1<<23)
#define IAB_ENABLE (1<<22)
#define IAB_MODIFY_FUNC (1<<21)
#define IAB_FUNC_SHIFT 16
#define IAB_MODIFY_SRC_FACTOR (1<<11)
#define IAB_SRC_FACTOR_SHIFT 6
#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6)
#define IAB_MODIFY_DST_FACTOR (1<<5)
#define IAB_DST_FACTOR_SHIFT 0
#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0)
 
#define BLENDFACT_ZERO 0x01
#define BLENDFACT_ONE 0x02
#define BLENDFACT_SRC_COLR 0x03
#define BLENDFACT_INV_SRC_COLR 0x04
#define BLENDFACT_SRC_ALPHA 0x05
#define BLENDFACT_INV_SRC_ALPHA 0x06
#define BLENDFACT_DST_ALPHA 0x07
#define BLENDFACT_INV_DST_ALPHA 0x08
#define BLENDFACT_DST_COLR 0x09
#define BLENDFACT_INV_DST_COLR 0x0a
#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b
#define BLENDFACT_CONST_COLOR 0x0c
#define BLENDFACT_INV_CONST_COLOR 0x0d
#define BLENDFACT_CONST_ALPHA 0x0e
#define BLENDFACT_INV_CONST_ALPHA 0x0f
#define BLENDFACT_MASK 0x0f
 
#define BLENDFUNC_ADD 0x0
#define BLENDFUNC_SUBTRACT 0x1
#define BLENDFUNC_REVERSE_SUBTRACT 0x2
#define BLENDFUNC_MIN 0x3
#define BLENDFUNC_MAX 0x4
#define BLENDFUNC_MASK 0x7
 
/* 3DSTATE_LOAD_INDIRECT, p180 */
 
#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16))
#define LI0_STATE_STATIC_INDIRECT (0x01<<8)
#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8)
#define LI0_STATE_SAMPLER (0x04<<8)
#define LI0_STATE_MAP (0x08<<8)
#define LI0_STATE_PROGRAM (0x10<<8)
#define LI0_STATE_CONSTANTS (0x20<<8)
 
#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define SIS0_FORCE_LOAD (1<<1)
#define SIS0_BUFFER_VALID (1<<0)
#define SIS1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define DIS0_BUFFER_RESET (1<<1)
#define DIS0_BUFFER_VALID (1<<0)
 
#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define SSB0_FORCE_LOAD (1<<1)
#define SSB0_BUFFER_VALID (1<<0)
#define SSB1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define MSB0_FORCE_LOAD (1<<1)
#define MSB0_BUFFER_VALID (1<<0)
#define MSB1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define PSP0_FORCE_LOAD (1<<1)
#define PSP0_BUFFER_VALID (1<<0)
#define PSP1_BUFFER_LENGTH(x) ((x)&0xff)
 
#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3)
#define PSC0_FORCE_LOAD (1<<1)
#define PSC0_BUFFER_VALID (1<<0)
#define PSC1_BUFFER_LENGTH(x) ((x)&0xff)
 
/* _3DSTATE_RASTERIZATION_RULES */
#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24))
#define ENABLE_POINT_RASTER_RULE (1<<15)
#define OGL_POINT_RASTER_RULE (1<<13)
#define ENABLE_TEXKILL_3D_4D (1<<10)
#define TEXKILL_3D (0<<9)
#define TEXKILL_4D (1<<9)
#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8)
#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5)
#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6)
#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3)
 
/* _3DSTATE_SCISSOR_ENABLE, p256 */
#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19))
#define ENABLE_SCISSOR_RECT ((1<<1) | 1)
#define DISABLE_SCISSOR_RECT (1<<1)
 
/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */
#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1)
/* Dword 1 */
#define SCISSOR_RECT_0_YMIN(x) ((x)<<16)
#define SCISSOR_RECT_0_XMIN(x) (x)
/* Dword 2 */
#define SCISSOR_RECT_0_YMAX(x) ((x)<<16)
#define SCISSOR_RECT_0_XMAX(x) (x)
 
/* p189 */
#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16))
#define I1_LOAD_S(n) (1<<(4+n))
 
#define S0_VB_OFFSET_MASK 0xffffffc
#define S0_AUTO_CACHE_INV_DISABLE (1<<0)
 
#define S1_VERTEX_WIDTH_SHIFT 24
#define S1_VERTEX_WIDTH_MASK (0x3f<<24)
#define S1_VERTEX_PITCH_SHIFT 16
#define S1_VERTEX_PITCH_MASK (0x3f<<16)
 
#define TEXCOORDFMT_2D 0x0
#define TEXCOORDFMT_3D 0x1
#define TEXCOORDFMT_4D 0x2
#define TEXCOORDFMT_1D 0x3
#define TEXCOORDFMT_2D_16 0x4
#define TEXCOORDFMT_4D_16 0x5
#define TEXCOORDFMT_NOT_PRESENT 0xf
#define S2_TEXCOORD_FMT0_MASK 0xf
#define S2_TEXCOORD_FMT1_SHIFT 4
#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4))
#define S2_TEXCOORD_NONE (~0)
 
#define TEXCOORD_WRAP_SHORTEST_TCX 8
#define TEXCOORD_WRAP_SHORTEST_TCY 4
#define TEXCOORD_WRAP_SHORTEST_TCZ 2
#define TEXCOORD_PERSPECTIVE_DISABLE 1
 
#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4))
#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4))
#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4))
#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4))
 
/* S3 not interesting */
 
#define S4_POINT_WIDTH_SHIFT 23
#define S4_POINT_WIDTH_MASK (0x1ff<<23)
#define S4_LINE_WIDTH_SHIFT 19
#define S4_LINE_WIDTH_ONE (0x2<<19)
#define S4_LINE_WIDTH_MASK (0xf<<19)
#define S4_FLATSHADE_ALPHA (1<<18)
#define S4_FLATSHADE_FOG (1<<17)
#define S4_FLATSHADE_SPECULAR (1<<16)
#define S4_FLATSHADE_COLOR (1<<15)
#define S4_CULLMODE_BOTH (0<<13)
#define S4_CULLMODE_NONE (1<<13)
#define S4_CULLMODE_CW (2<<13)
#define S4_CULLMODE_CCW (3<<13)
#define S4_CULLMODE_MASK (3<<13)
#define S4_VFMT_POINT_WIDTH (1<<12)
#define S4_VFMT_SPEC_FOG (1<<11)
#define S4_VFMT_COLOR (1<<10)
#define S4_VFMT_DEPTH_OFFSET (1<<9)
#define S4_VFMT_XYZ (1<<6)
#define S4_VFMT_XYZW (2<<6)
#define S4_VFMT_XY (3<<6)
#define S4_VFMT_XYW (4<<6)
#define S4_VFMT_XYZW_MASK (7<<6)
#define S4_FORCE_DEFAULT_DIFFUSE (1<<5)
#define S4_FORCE_DEFAULT_SPECULAR (1<<4)
#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3)
#define S4_VFMT_FOG_PARAM (1<<2)
#define S4_SPRITE_POINT_ENABLE (1<<1)
#define S4_LINE_ANTIALIAS_ENABLE (1<<0)
 
#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \
S4_VFMT_SPEC_FOG | \
S4_VFMT_COLOR | \
S4_VFMT_DEPTH_OFFSET | \
S4_VFMT_XYZW_MASK | \
S4_VFMT_FOG_PARAM)
 
#define S5_WRITEDISABLE_ALPHA (1<<31)
#define S5_WRITEDISABLE_RED (1<<30)
#define S5_WRITEDISABLE_GREEN (1<<29)
#define S5_WRITEDISABLE_BLUE (1<<28)
#define S5_WRITEDISABLE_MASK (0xf<<28)
#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27)
#define S5_LAST_PIXEL_ENABLE (1<<26)
#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25)
#define S5_FOG_ENABLE (1<<24)
#define S5_STENCIL_REF_SHIFT 16
#define S5_STENCIL_REF_MASK (0xff<<16)
#define S5_STENCIL_TEST_FUNC_SHIFT 13
#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13)
#define S5_STENCIL_FAIL_SHIFT 10
#define S5_STENCIL_FAIL_MASK (0x7<<10)
#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7
#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7)
#define S5_STENCIL_PASS_Z_PASS_SHIFT 4
#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4)
#define S5_STENCIL_WRITE_ENABLE (1<<3)
#define S5_STENCIL_TEST_ENABLE (1<<2)
#define S5_COLOR_DITHER_ENABLE (1<<1)
#define S5_LOGICOP_ENABLE (1<<0)
 
#define S6_ALPHA_TEST_ENABLE (1<<31)
#define S6_ALPHA_TEST_FUNC_SHIFT 28
#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28)
#define S6_ALPHA_REF_SHIFT 20
#define S6_ALPHA_REF_MASK (0xff<<20)
#define S6_DEPTH_TEST_ENABLE (1<<19)
#define S6_DEPTH_TEST_FUNC_SHIFT 16
#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16)
#define S6_CBUF_BLEND_ENABLE (1<<15)
#define S6_CBUF_BLEND_FUNC_SHIFT 12
#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12)
#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8
#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8)
#define S6_CBUF_DST_BLEND_FACT_SHIFT 4
#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4)
#define S6_DEPTH_WRITE_ENABLE (1<<3)
#define S6_COLOR_WRITE_ENABLE (1<<2)
#define S6_TRISTRIP_PV_SHIFT 0
#define S6_TRISTRIP_PV_MASK (0x3<<0)
 
#define S7_DEPTH_OFFSET_CONST_MASK ~0
 
/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */
/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */
 
/* _3DSTATE_MODES_4, p218 */
#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24))
#define ENABLE_LOGIC_OP_FUNC (1<<23)
#define LOGIC_OP_FUNC(x) ((x)<<18)
#define LOGICOP_MASK (0xf<<18)
#define LOGICOP_COPY 0xc
#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00))
#define ENABLE_STENCIL_TEST_MASK (1<<17)
#define STENCIL_TEST_MASK(x) ((x)<<8)
#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff))
#define ENABLE_STENCIL_WRITE_MASK (1<<16)
#define STENCIL_WRITE_MASK(x) ((x)&0xff)
 
/* _3DSTATE_MODES_5, p220 */
#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24))
#define PIPELINE_FLUSH_RENDER_CACHE (1<<18)
#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16)
 
/* p221 */
#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16))
#define PS1_REG(n) (1<<(n))
#define PS2_CONST_X(n) (n)
#define PS3_CONST_Y(n) (n)
#define PS4_CONST_Z(n) (n)
#define PS5_CONST_W(n) (n)
 
/* p222 */
 
#define I915_MAX_TEX_INDIRECT 4
#define I915_MAX_TEX_INSN 32
#define I915_MAX_ALU_INSN 64
#define I915_MAX_DECL_INSN 27
#define I915_MAX_TEMPORARY 16
 
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.
*/
#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
 
#define REG_TYPE_R 0 /* temporary regs, no need to
* dcl, must be written before
* read -- Preserved between
* phases.
*/
#define REG_TYPE_T 1 /* Interpolated values, must be
* dcl'ed before use.
*
* 0..7: texture coord,
* 8: diffuse spec,
* 9: specular color,
* 10: fog parameter in w.
*/
#define REG_TYPE_CONST 2 /* Restriction: only one const
* can be referenced per
* instruction, though it may be
* selected for multiple inputs.
* Constants not initialized
* default to zero.
*/
#define REG_TYPE_S 3 /* sampler */
#define REG_TYPE_OC 4 /* output color (rgba) */
#define REG_TYPE_OD 5 /* output depth (w), xyz are
* temporaries. If not written,
* interpolated depth is used?
*/
#define REG_TYPE_U 6 /* unpreserved temporaries */
#define REG_TYPE_MASK 0x7
#define REG_NR_MASK 0xf
 
/* REG_TYPE_T:
*/
#define T_TEX0 0
#define T_TEX1 1
#define T_TEX2 2
#define T_TEX3 3
#define T_TEX4 4
#define T_TEX5 5
#define T_TEX6 6
#define T_TEX7 7
#define T_DIFFUSE 8
#define T_SPECULAR 9
#define T_FOG_W 10 /* interpolated fog is in W coord */
 
/* Arithmetic instructions */
 
/* .replicate_swizzle == selection and replication of a particular
* scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
*/
#define A0_NOP (0x0<<24) /* no operation */
#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
#define A0_MOV (0x2<<24) /* dst = src0 */
#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
#define A0_FLR (0x10<<24) /* dst = floor(src0) */
#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
#define A0_TRC (0x12<<24) /* dst = int(src0) */
#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
#define A0_DEST_SATURATE (1<<22)
#define A0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
#define A0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define A0_DEST_CHANNEL_X (1<<10)
#define A0_DEST_CHANNEL_Y (2<<10)
#define A0_DEST_CHANNEL_Z (4<<10)
#define A0_DEST_CHANNEL_W (8<<10)
#define A0_DEST_CHANNEL_ALL (0xf<<10)
#define A0_DEST_CHANNEL_SHIFT 10
#define A0_SRC0_TYPE_SHIFT 7
#define A0_SRC0_NR_SHIFT 2
 
#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
 
#define SRC_X 0
#define SRC_Y 1
#define SRC_Z 2
#define SRC_W 3
#define SRC_ZERO 4
#define SRC_ONE 5
 
#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
#define A1_SRC0_CHANNEL_X_SHIFT 28
#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
#define A1_SRC0_CHANNEL_Y_SHIFT 24
#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
#define A1_SRC0_CHANNEL_Z_SHIFT 20
#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
#define A1_SRC0_CHANNEL_W_SHIFT 16
#define A1_SRC1_TYPE_SHIFT 13
#define A1_SRC1_NR_SHIFT 8
#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
#define A1_SRC1_CHANNEL_X_SHIFT 4
#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
#define A1_SRC1_CHANNEL_Y_SHIFT 0
 
#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
#define A2_SRC1_CHANNEL_Z_SHIFT 28
#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
#define A2_SRC1_CHANNEL_W_SHIFT 24
#define A2_SRC2_TYPE_SHIFT 21
#define A2_SRC2_NR_SHIFT 16
#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
#define A2_SRC2_CHANNEL_X_SHIFT 12
#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
#define A2_SRC2_CHANNEL_Y_SHIFT 8
#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
#define A2_SRC2_CHANNEL_Z_SHIFT 4
#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
#define A2_SRC2_CHANNEL_W_SHIFT 0
 
/* Texture instructions */
#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
* sampler and address, and output
* filtered texel data to destination
* register */
#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
* perspective divide of the texture
* coordinate .xyz values by .w before
* sampling. */
#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
* computed LOD by w. Only S4.6 two's
* comp is used. This implies that a
* float to fixed conversion is
* done. */
#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
* operation. Simply kills the pixel
* if any channel of the address
* register is < 0.0. */
#define T0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
/* Note: U (unpreserved) regs do not retain their values between
* phases (cannot be used for feedback)
*
* Note: oC and OD registers can only be used as the destination of a
* texture instruction once per phase (this is an implementation
* restriction).
*/
#define T0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
#define T0_SAMPLER_NR_MASK (0xf<<0)
 
#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
#define T1_ADDRESS_REG_NR_SHIFT 17
#define T2_MBZ 0
 
/* Declaration instructions */
#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
* register or an s (sampler)
* register. */
#define D0_SAMPLE_TYPE_SHIFT 22
#define D0_SAMPLE_TYPE_2D (0x0<<22)
#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
#define D0_SAMPLE_TYPE_MASK (0x3<<22)
 
#define D0_TYPE_SHIFT 19
/* Allow: T, S */
#define D0_NR_SHIFT 14
/* Allow T: 0..10, S: 0..15 */
#define D0_CHANNEL_X (1<<10)
#define D0_CHANNEL_Y (2<<10)
#define D0_CHANNEL_Z (4<<10)
#define D0_CHANNEL_W (8<<10)
#define D0_CHANNEL_ALL (0xf<<10)
#define D0_CHANNEL_NONE (0<<10)
 
#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
 
/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
* or specular declarations.
*
* For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
*
* Must be zero for S (sampler) dcls
*/
#define D1_MBZ 0
#define D2_MBZ 0
 
/* p207.
* The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK
*/
#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16))
 
#define MS1_MAPMASK_SHIFT 0
#define MS1_MAPMASK_MASK (0x8fff<<0)
 
#define MS2_UNTRUSTED_SURFACE (1<<31)
#define MS2_ADDRESS_MASK 0xfffffffc
#define MS2_VERTICAL_LINE_STRIDE (1<<1)
#define MS2_VERTICAL_OFFSET (1<<1)
 
#define MS3_HEIGHT_SHIFT 21
#define MS3_WIDTH_SHIFT 10
#define MS3_PALETTE_SELECT (1<<9)
#define MS3_MAPSURF_FORMAT_SHIFT 7
#define MS3_MAPSURF_FORMAT_MASK (0x7<<7)
#define MAPSURF_8BIT (1<<7)
#define MAPSURF_16BIT (2<<7)
#define MAPSURF_32BIT (3<<7)
#define MAPSURF_422 (5<<7)
#define MAPSURF_COMPRESSED (6<<7)
#define MAPSURF_4BIT_INDEXED (7<<7)
#define MS3_MT_FORMAT_MASK (0x7 << 3)
#define MS3_MT_FORMAT_SHIFT 3
#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */
#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */
#define MT_8BIT_L8 (1<<3)
#define MT_8BIT_A8 (4<<3)
#define MT_8BIT_MONO8 (5<<3)
#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */
#define MT_16BIT_ARGB1555 (1<<3)
#define MT_16BIT_ARGB4444 (2<<3)
#define MT_16BIT_AY88 (3<<3)
#define MT_16BIT_88DVDU (5<<3)
#define MT_16BIT_BUMP_655LDVDU (6<<3)
#define MT_16BIT_I16 (7<<3)
#define MT_16BIT_L16 (8<<3)
#define MT_16BIT_A16 (9<<3)
#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */
#define MT_32BIT_ABGR8888 (1<<3)
#define MT_32BIT_XRGB8888 (2<<3)
#define MT_32BIT_XBGR8888 (3<<3)
#define MT_32BIT_QWVU8888 (4<<3)
#define MT_32BIT_AXVU8888 (5<<3)
#define MT_32BIT_LXVU8888 (6<<3)
#define MT_32BIT_XLVU8888 (7<<3)
#define MT_32BIT_ARGB2101010 (8<<3)
#define MT_32BIT_ABGR2101010 (9<<3)
#define MT_32BIT_AWVU2101010 (0xA<<3)
#define MT_32BIT_GR1616 (0xB<<3)
#define MT_32BIT_VU1616 (0xC<<3)
#define MT_32BIT_xI824 (0xD<<3)
#define MT_32BIT_xA824 (0xE<<3)
#define MT_32BIT_xL824 (0xF<<3)
#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */
#define MT_422_YCRCB_NORMAL (1<<3)
#define MT_422_YCRCB_SWAPUV (2<<3)
#define MT_422_YCRCB_SWAPUVY (3<<3)
#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */
#define MT_COMPRESS_DXT2_3 (1<<3)
#define MT_COMPRESS_DXT4_5 (2<<3)
#define MT_COMPRESS_FXT1 (3<<3)
#define MT_COMPRESS_DXT1_RGB (4<<3)
#define MS3_USE_FENCE_REGS (1<<2)
#define MS3_TILED_SURFACE (1<<1)
#define MS3_TILE_WALK (1<<0)
 
/* The pitch is the pitch measured in DWORDS, minus 1 */
#define MS4_PITCH_SHIFT 21
#define MS4_CUBE_FACE_ENA_NEGX (1<<20)
#define MS4_CUBE_FACE_ENA_POSX (1<<19)
#define MS4_CUBE_FACE_ENA_NEGY (1<<18)
#define MS4_CUBE_FACE_ENA_POSY (1<<17)
#define MS4_CUBE_FACE_ENA_NEGZ (1<<16)
#define MS4_CUBE_FACE_ENA_POSZ (1<<15)
#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15)
#define MS4_MAX_LOD_SHIFT 9
#define MS4_MAX_LOD_MASK (0x3f<<9)
#define MS4_MIP_LAYOUT_LEGACY (0<<8)
#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8)
#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8)
#define MS4_VOLUME_DEPTH_SHIFT 0
#define MS4_VOLUME_DEPTH_MASK (0xff<<0)
 
/* p244.
* The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK.
*/
#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16))
 
#define SS1_MAPMASK_SHIFT 0
#define SS1_MAPMASK_MASK (0x8fff<<0)
 
#define SS2_REVERSE_GAMMA_ENABLE (1<<31)
#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30)
#define SS2_COLORSPACE_CONVERSION (1<<29)
#define SS2_CHROMAKEY_SHIFT 27
#define SS2_BASE_MIP_LEVEL_SHIFT 22
#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22)
#define SS2_MIP_FILTER_SHIFT 20
#define SS2_MIP_FILTER_MASK (0x3<<20)
#define MIPFILTER_NONE 0
#define MIPFILTER_NEAREST 1
#define MIPFILTER_LINEAR 3
#define SS2_MAG_FILTER_SHIFT 17
#define SS2_MAG_FILTER_MASK (0x7<<17)
#define FILTER_NEAREST 0
#define FILTER_LINEAR 1
#define FILTER_ANISOTROPIC 2
#define FILTER_4X4_1 3
#define FILTER_4X4_2 4
#define FILTER_4X4_FLAT 5
#define FILTER_6X5_MONO 6 /* XXX - check */
#define SS2_MIN_FILTER_SHIFT 14
#define SS2_MIN_FILTER_MASK (0x7<<14)
#define SS2_LOD_BIAS_SHIFT 5
#define SS2_LOD_BIAS_ONE (0x10<<5)
#define SS2_LOD_BIAS_MASK (0x1ff<<5)
/* Shadow requires:
* MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format
* FILTER_4X4_x MIN and MAG filters
*/
#define SS2_SHADOW_ENABLE (1<<4)
#define SS2_MAX_ANISO_MASK (1<<3)
#define SS2_MAX_ANISO_2 (0<<3)
#define SS2_MAX_ANISO_4 (1<<3)
#define SS2_SHADOW_FUNC_SHIFT 0
#define SS2_SHADOW_FUNC_MASK (0x7<<0)
/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */
 
#define SS3_MIN_LOD_SHIFT 24
#define SS3_MIN_LOD_ONE (0x10<<24)
#define SS3_MIN_LOD_MASK (0xff<<24)
#define SS3_KILL_PIXEL_ENABLE (1<<17)
#define SS3_TCX_ADDR_MODE_SHIFT 12
#define SS3_TCX_ADDR_MODE_MASK (0x7<<12)
#define TEXCOORDMODE_WRAP 0
#define TEXCOORDMODE_MIRROR 1
#define TEXCOORDMODE_CLAMP_EDGE 2
#define TEXCOORDMODE_CUBE 3
#define TEXCOORDMODE_CLAMP_BORDER 4
#define TEXCOORDMODE_MIRROR_ONCE 5
#define SS3_TCY_ADDR_MODE_SHIFT 9
#define SS3_TCY_ADDR_MODE_MASK (0x7<<9)
#define SS3_TCZ_ADDR_MODE_SHIFT 6
#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6)
#define SS3_NORMALIZED_COORDS (1<<5)
#define SS3_TEXTUREMAP_INDEX_SHIFT 1
#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1)
#define SS3_DEINTERLACER_ENABLE (1<<0)
 
#define SS4_BORDER_COLOR_MASK (~0)
 
/* 3DSTATE_SPAN_STIPPLE, p258
*/
#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16))
#define ST1_ENABLE (1<<16)
#define ST1_MASK (0xffff)
 
#define FLUSH_MAP_CACHE (1<<0)
#define FLUSH_RENDER_CACHE (1<<1)
 
#endif
/* -*- c-basic-offset: 4 -*- */
/*
* Copyright © 2006,2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
/* Each instruction is 3 dwords long, though most don't require all
* this space. Maximum of 123 instructions. Smaller maxes per insn
* type.
*/
#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16))
 
#define REG_TYPE_R 0 /* temporary regs, no need to
* dcl, must be written before
* read -- Preserved between
* phases.
*/
#define REG_TYPE_T 1 /* Interpolated values, must be
* dcl'ed before use.
*
* 0..7: texture coord,
* 8: diffuse spec,
* 9: specular color,
* 10: fog parameter in w.
*/
#define REG_TYPE_CONST 2 /* Restriction: only one const
* can be referenced per
* instruction, though it may be
* selected for multiple inputs.
* Constants not initialized
* default to zero.
*/
#define REG_TYPE_S 3 /* sampler */
#define REG_TYPE_OC 4 /* output color (rgba) */
#define REG_TYPE_OD 5 /* output depth (w), xyz are
* temporaries. If not written,
* interpolated depth is used?
*/
#define REG_TYPE_U 6 /* unpreserved temporaries */
#define REG_TYPE_MASK 0x7
#define REG_TYPE_SHIFT 4
#define REG_NR_MASK 0xf
 
/* REG_TYPE_T:
*/
#define T_TEX0 0
#define T_TEX1 1
#define T_TEX2 2
#define T_TEX3 3
#define T_TEX4 4
#define T_TEX5 5
#define T_TEX6 6
#define T_TEX7 7
#define T_DIFFUSE 8
#define T_SPECULAR 9
#define T_FOG_W 10 /* interpolated fog is in W coord */
 
/* Arithmetic instructions */
 
/* .replicate_swizzle == selection and replication of a particular
* scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww
*/
#define A0_NOP (0x0<<24) /* no operation */
#define A0_ADD (0x1<<24) /* dst = src0 + src1 */
#define A0_MOV (0x2<<24) /* dst = src0 */
#define A0_MUL (0x3<<24) /* dst = src0 * src1 */
#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */
#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */
#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */
#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */
#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */
#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */
#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */
#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */
#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */
#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */
#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */
#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */
#define A0_FLR (0x10<<24) /* dst = floor(src0) */
#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */
#define A0_TRC (0x12<<24) /* dst = int(src0) */
#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */
#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */
#define A0_DEST_SATURATE (1<<22)
#define A0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
#define A0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define A0_DEST_CHANNEL_X (1<<10)
#define A0_DEST_CHANNEL_Y (2<<10)
#define A0_DEST_CHANNEL_Z (4<<10)
#define A0_DEST_CHANNEL_W (8<<10)
#define A0_DEST_CHANNEL_ALL (0xf<<10)
#define A0_DEST_CHANNEL_SHIFT 10
#define A0_SRC0_TYPE_SHIFT 7
#define A0_SRC0_NR_SHIFT 2
 
#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y)
#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z)
 
#define SRC_X 0
#define SRC_Y 1
#define SRC_Z 2
#define SRC_W 3
#define SRC_ZERO 4
#define SRC_ONE 5
 
#define A1_SRC0_CHANNEL_X_NEGATE (1<<31)
#define A1_SRC0_CHANNEL_X_SHIFT 28
#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27)
#define A1_SRC0_CHANNEL_Y_SHIFT 24
#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23)
#define A1_SRC0_CHANNEL_Z_SHIFT 20
#define A1_SRC0_CHANNEL_W_NEGATE (1<<19)
#define A1_SRC0_CHANNEL_W_SHIFT 16
#define A1_SRC1_TYPE_SHIFT 13
#define A1_SRC1_NR_SHIFT 8
#define A1_SRC1_CHANNEL_X_NEGATE (1<<7)
#define A1_SRC1_CHANNEL_X_SHIFT 4
#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3)
#define A1_SRC1_CHANNEL_Y_SHIFT 0
 
#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31)
#define A2_SRC1_CHANNEL_Z_SHIFT 28
#define A2_SRC1_CHANNEL_W_NEGATE (1<<27)
#define A2_SRC1_CHANNEL_W_SHIFT 24
#define A2_SRC2_TYPE_SHIFT 21
#define A2_SRC2_NR_SHIFT 16
#define A2_SRC2_CHANNEL_X_NEGATE (1<<15)
#define A2_SRC2_CHANNEL_X_SHIFT 12
#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11)
#define A2_SRC2_CHANNEL_Y_SHIFT 8
#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7)
#define A2_SRC2_CHANNEL_Z_SHIFT 4
#define A2_SRC2_CHANNEL_W_NEGATE (1<<3)
#define A2_SRC2_CHANNEL_W_SHIFT 0
 
/* Texture instructions */
#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared
* sampler and address, and output
* filtered texel data to destination
* register */
#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a
* perspective divide of the texture
* coordinate .xyz values by .w before
* sampling. */
#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the
* computed LOD by w. Only S4.6 two's
* comp is used. This implies that a
* float to fixed conversion is
* done. */
#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling
* operation. Simply kills the pixel
* if any channel of the address
* register is < 0.0. */
#define T0_DEST_TYPE_SHIFT 19
/* Allow: R, OC, OD, U */
/* Note: U (unpreserved) regs do not retain their values between
* phases (cannot be used for feedback)
*
* Note: oC and OD registers can only be used as the destination of a
* texture instruction once per phase (this is an implementation
* restriction).
*/
#define T0_DEST_NR_SHIFT 14
/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */
#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */
#define T0_SAMPLER_NR_MASK (0xf<<0)
 
#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */
/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */
#define T1_ADDRESS_REG_NR_SHIFT 17
#define T2_MBZ 0
 
/* Declaration instructions */
#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib)
* register or an s (sampler)
* register. */
#define D0_SAMPLE_TYPE_SHIFT 22
#define D0_SAMPLE_TYPE_2D (0x0<<22)
#define D0_SAMPLE_TYPE_CUBE (0x1<<22)
#define D0_SAMPLE_TYPE_VOLUME (0x2<<22)
#define D0_SAMPLE_TYPE_MASK (0x3<<22)
 
#define D0_TYPE_SHIFT 19
/* Allow: T, S */
#define D0_NR_SHIFT 14
/* Allow T: 0..10, S: 0..15 */
#define D0_CHANNEL_X (1<<10)
#define D0_CHANNEL_Y (2<<10)
#define D0_CHANNEL_Z (4<<10)
#define D0_CHANNEL_W (8<<10)
#define D0_CHANNEL_ALL (0xf<<10)
#define D0_CHANNEL_NONE (0<<10)
 
#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y)
#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z)
 
/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse
* or specular declarations.
*
* For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw)
*
* Must be zero for S (sampler) dcls
*/
#define D1_MBZ 0
#define D2_MBZ 0
 
 
/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic
* operations
*/
#define MASK_X 0x1
#define MASK_Y 0x2
#define MASK_Z 0x4
#define MASK_W 0x8
#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z)
#define MASK_XYZW (MASK_XYZ | MASK_W)
#define MASK_SATURATE 0x10
 
/* Temporary, undeclared regs. Preserved between phases */
#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0)
#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1)
#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2)
#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3)
 
/* Texture coordinate regs. Must be declared. */
#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0)
#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1)
#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2)
#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3)
#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4)
#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5)
#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6)
#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7)
#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8)
#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9)
#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10)
 
/* Constant values */
#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0)
#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1)
#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2)
#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3)
#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4)
#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5)
#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6)
#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7)
 
/* Sampler regs */
#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0)
#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1)
#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2)
#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3)
 
/* Output color */
#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0)
 
/* Output depth */
#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0)
 
/* Unpreserved temporary regs */
#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0)
#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1)
#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2)
#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3)
 
#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3)
#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4)
#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4)
#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4)
 
#define REG_CHANNEL_MASK 0xf
 
#define REG_NR(reg) ((reg) & REG_NR_MASK)
#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK)
#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK)
 
enum gen3_fs_channel {
X_CHANNEL_VAL = 0,
Y_CHANNEL_VAL,
Z_CHANNEL_VAL,
W_CHANNEL_VAL,
ZERO_CHANNEL_VAL,
ONE_CHANNEL_VAL,
 
NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8,
NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8,
NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8,
NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8,
NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8
};
 
#define gen3_fs_operand(reg, x, y, z, w) \
(reg) | \
(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \
(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \
(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \
(w##_CHANNEL_VAL << W_CHANNEL_SHIFT)
 
/**
* Construct an operand description for using a register with no swizzling
*/
#define gen3_fs_operand_reg(reg) \
gen3_fs_operand(reg, X, Y, Z, W)
 
#define gen3_fs_operand_reg_negate(reg) \
gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W)
 
/**
* Returns an operand containing (0.0, 0.0, 0.0, 0.0).
*/
#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO)
 
/**
* Returns an unused operand
*/
#define gen3_fs_operand_none() gen3_fs_operand_zero()
 
/**
* Returns an operand containing (1.0, 1.0, 1.0, 1.0).
*/
#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE)
 
#define gen3_get_hardware_channel_val(val, shift, negate) \
(((val & 0x7) << shift) | ((val & 0x8) ? negate : 0))
 
/**
* Outputs a fragment shader command to declare a sampler or texture register.
*/
#define gen3_fs_dcl(reg) \
do { \
OUT_BATCH(D0_DCL | \
(REG_TYPE(reg) << D0_TYPE_SHIFT) | \
(REG_NR(reg) << D0_NR_SHIFT) | \
((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \
OUT_BATCH(0); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_texld(dest_reg, sampler_reg, address_reg) \
do { \
OUT_BATCH(T0_TEXLD | \
(REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
(REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
(REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg) \
do { \
OUT_BATCH(T0_TEXLDP | \
(REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \
(REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \
OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \
(REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \
OUT_BATCH(0); \
} while (0)
 
#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \
_gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2)
 
#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2) \
_gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2)
 
#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \
do { \
/* Set up destination register and write mask */ \
OUT_BATCH(cmd | \
(REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
(((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \
(((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \
/* Set up operand 0 */ \
(REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
(REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
A1_SRC0_CHANNEL_X_SHIFT, \
A1_SRC0_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand0), \
A1_SRC0_CHANNEL_Y_SHIFT, \
A1_SRC0_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand0), \
A1_SRC0_CHANNEL_Z_SHIFT, \
A1_SRC0_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand0), \
A1_SRC0_CHANNEL_W_SHIFT, \
A1_SRC0_CHANNEL_W_NEGATE) | \
/* Set up operand 1 */ \
(REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
(REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand1), \
A1_SRC1_CHANNEL_X_SHIFT, \
A1_SRC1_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand1), \
A1_SRC1_CHANNEL_Y_SHIFT, \
A1_SRC1_CHANNEL_Y_NEGATE)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
A2_SRC1_CHANNEL_Z_SHIFT, \
A2_SRC1_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand1), \
A2_SRC1_CHANNEL_W_SHIFT, \
A2_SRC1_CHANNEL_W_NEGATE) | \
/* Set up operand 2 */ \
(REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
(REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand2), \
A2_SRC2_CHANNEL_X_SHIFT, \
A2_SRC2_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand2), \
A2_SRC2_CHANNEL_Y_SHIFT, \
A2_SRC2_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand2), \
A2_SRC2_CHANNEL_Z_SHIFT, \
A2_SRC2_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand2), \
A2_SRC2_CHANNEL_W_SHIFT, \
A2_SRC2_CHANNEL_W_NEGATE)); \
} while (0)
 
#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\
/* Set up destination register and write mask */ \
OUT_BATCH(cmd | \
(REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \
(REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \
(A0_DEST_CHANNEL_ALL) | \
/* Set up operand 0 */ \
(REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \
(REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \
A1_SRC0_CHANNEL_X_SHIFT, \
A1_SRC0_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand0), \
A1_SRC0_CHANNEL_Y_SHIFT, \
A1_SRC0_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand0), \
A1_SRC0_CHANNEL_Z_SHIFT, \
A1_SRC0_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand0), \
A1_SRC0_CHANNEL_W_SHIFT, \
A1_SRC0_CHANNEL_W_NEGATE) | \
/* Set up operand 1 */ \
(REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \
(REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand1), \
A1_SRC1_CHANNEL_X_SHIFT, \
A1_SRC1_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand1), \
A1_SRC1_CHANNEL_Y_SHIFT, \
A1_SRC1_CHANNEL_Y_NEGATE)); \
OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \
A2_SRC1_CHANNEL_Z_SHIFT, \
A2_SRC1_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand1), \
A2_SRC1_CHANNEL_W_SHIFT, \
A2_SRC1_CHANNEL_W_NEGATE) | \
/* Set up operand 2 */ \
(REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \
(REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \
gen3_get_hardware_channel_val(REG_X(operand2), \
A2_SRC2_CHANNEL_X_SHIFT, \
A2_SRC2_CHANNEL_X_NEGATE) | \
gen3_get_hardware_channel_val(REG_Y(operand2), \
A2_SRC2_CHANNEL_Y_SHIFT, \
A2_SRC2_CHANNEL_Y_NEGATE) | \
gen3_get_hardware_channel_val(REG_Z(operand2), \
A2_SRC2_CHANNEL_Z_SHIFT, \
A2_SRC2_CHANNEL_Z_NEGATE) | \
gen3_get_hardware_channel_val(REG_W(operand2), \
A2_SRC2_CHANNEL_W_SHIFT, \
A2_SRC2_CHANNEL_W_NEGATE)); \
} while (0)
 
#define gen3_fs_mov(dest_reg, operand0) \
gen3_fs_arith(MOV, dest_reg, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0) \
gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
 
#define gen3_fs_frc(dest_reg, operand0) \
gen3_fs_arith (FRC, dest_reg, \
operand0, \
gen3_fs_operand_none(), \
gen3_fs_operand_none())
 
/** Add operand0 and operand1 and put the result in dest_reg */
#define gen3_fs_add(dest_reg, operand0, operand1) \
gen3_fs_arith (ADD, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Multiply operand0 and operand1 and put the result in dest_reg */
#define gen3_fs_mul(dest_reg, operand0, operand1) \
gen3_fs_arith (MUL, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */
#define gen3_fs_rcp(dest_reg, dest_mask, operand0) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} else { \
gen3_fs_arith (RCP, dest_reg, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} \
} while (0)
 
/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */
#define gen3_fs_rsq(dest_reg, dest_mask, operand0) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} else { \
gen3_fs_arith (RSQ, dest_reg, \
operand0, \
gen3_fs_operand_none (), \
gen3_fs_operand_none ()); \
} \
} while (0)
 
/** Puts the minimum of operand0 and operand1 in dest_reg */
#define gen3_fs_min(dest_reg, operand0, operand1) \
gen3_fs_arith (MIN, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
/** Puts the maximum of operand0 and operand1 in dest_reg */
#define gen3_fs_max(dest_reg, operand0, operand1) \
gen3_fs_arith (MAX, dest_reg, \
operand0, operand1, \
gen3_fs_operand_none())
 
#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2) \
gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2)
 
/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */
#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \
} else { \
gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \
} \
} while (0)
 
#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \
} else { \
gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \
} \
} while (0)
 
/**
* Perform a 3-component dot-product of operand0 and operand1 and put the
* resulting scalar in the channels of dest_reg specified by the dest_mask.
*/
#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \
op0, op1,\
gen3_fs_operand_none()); \
} else { \
gen3_fs_arith (DP3, dest_reg, op0, op1,\
gen3_fs_operand_none()); \
} \
} while (0)
 
/**
* Perform a 4-component dot-product of operand0 and operand1 and put the
* resulting scalar in the channels of dest_reg specified by the dest_mask.
*/
#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1) \
do { \
if (dest_mask) { \
gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \
op0, op1,\
gen3_fs_operand_none()); \
} else { \
gen3_fs_arith (DP4, dest_reg, op0, op1,\
gen3_fs_operand_none()); \
} \
} while (0)
 
#define SHADER_TRAPEZOIDS (1 << 24)
/drivers/video/Intel-2D/sna/gen4_render.c
0,0 → 1,2414
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen4_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
/* gen4 has a serious issue with its shaders that we need to flush
* after every rectangle... So until that is resolved, prefer
* the BLT engine.
*/
#define FORCE_SPANS 0
#define FORCE_NONRECTILINEAR_SPANS -1
#define FORCE_FLUSH 1 /* https://bugs.freedesktop.org/show_bug.cgi?id=55500 */
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_ONE 0
#define NO_FILL_BOXES 0
#define NO_VIDEO 0
 
#define MAX_FLUSH_VERTICES 6
 
#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
#define URB_CS_ENTRY_SIZE 1
#define URB_CS_ENTRIES 0
 
#define URB_VS_ENTRY_SIZE 1
#define URB_VS_ENTRIES 32
 
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
 
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
 
#define URB_SF_ENTRY_SIZE 2
#define URB_SF_ENTRIES 64
 
/*
* this program computes dA/dx and dA/dy for the texture coordinates along
* with the base texture coordinate. It was extracted from the Mesa driver
*/
 
#define SF_KERNEL_NUM_GRF 16
#define PS_KERNEL_NUM_GRF 32
 
#define GEN4_MAX_SF_THREADS 24
#define GEN4_MAX_WM_THREADS 32
#define G4X_MAX_WM_THREADS 50
 
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_argb.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
static const uint32_t ps_kernel_planar_static[][4] = {
#include "exa_wm_xy.g4b"
#include "exa_wm_src_affine.g4b"
#include "exa_wm_src_sample_planar.g4b"
#include "exa_wm_yuv_rgb.g4b"
#include "exa_wm_write.g4b"
};
 
#define NOKERNEL(kernel_enum, func, masked) \
[kernel_enum] = {func, 0, masked}
#define KERNEL(kernel_enum, kernel, masked) \
[kernel_enum] = {&kernel, sizeof(kernel), masked}
static const struct wm_kernel_info {
const void *data;
unsigned int size;
bool has_mask;
} wm_kernels[] = {
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen4_blend_op[] = {
/* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO},
/* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE},
/* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE},
/* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen4_blend_op.
*
* This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR,
* GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64)
 
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel);
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
#define GEN4_MAX_3D_SIZE 8192
 
static inline bool too_large(int width, int height)
{
return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
}
 
static int
gen4_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen4_blend_op[op].src_alpha)
base = WM_KERNEL_MASKSA;
else
base = WM_KERNEL_MASKCA;
} else
base = WM_KERNEL_MASK;
} else
base = WM_KERNEL;
 
return base + !is_affine;
}
 
static bool gen4_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen4_render_state *state = &sna->render_state.gen4;
 
if (!op->need_magic_ca_pass)
return false;
 
assert(sna->render.vertex_index > sna->render.vertex_start);
 
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
 
gen4_emit_pipelined_pointers(sna, op, PictOpAdd,
gen4_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static uint32_t gen4_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
src = GEN4_BLENDFACTOR_ONE; //gen4_blend_op[op].src_blend;
dst = GEN4_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN4_BLENDFACTOR_DST_ALPHA)
src = GEN4_BLENDFACTOR_ONE;
else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA)
src = GEN4_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen4_blend_op[op].src_alpha) {
if (dst == GEN4_BLENDFACTOR_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN4_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen4_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN4_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
static uint32_t gen4_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN4_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
typedef struct gen4_surface_state_padded {
struct gen4_surface_state state;
char pad[32 - sizeof(struct gen4_surface_state)];
} gen4_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen4_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR;
break;
}
}
 
static uint32_t
gen4_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN4_SURFACE_TILED;
case I915_TILING_Y: return GEN4_SURFACE_TILED | GEN4_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen4_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t domains;
uint16_t offset;
uint32_t *ss;
 
assert(sna->kgem.gen != 040 || !kgem_bo_is_snoop(bo));
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
 
ss[0] = (GEN4_SURFACE_2D << GEN4_SURFACE_TYPE_SHIFT |
GEN4_SURFACE_BLEND_ENABLED |
format << GEN4_SURFACE_FORMAT_SHIFT);
 
if (is_dst) {
ss[0] |= GEN4_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
ss[2] = ((width - 1) << GEN4_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN4_SURFACE_HEIGHT_SHIFT);
ss[3] = (gen4_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN4_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen4_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen4.ve_id;
 
assert((sna->render.vb_id & (1 << id)) == 0);
 
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen4_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN4_3DPRIMITIVE |
GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen4.last_primitive = sna->kgem.nbatch;
}
 
static bool gen4_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
unsigned int id = 1 << op->u.gen4.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
/* 7xpipelined pointers + 6xprimitive + 1xflush */
ndwords = op->need_magic_ca_pass? 20 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
ndwords += 2*FORCE_FLUSH;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen4_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen4_emit_primitive(sna);
 
return true;
}
 
static int gen4_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem,
2*FORCE_FLUSH + (op->need_magic_ca_pass ? 25 : 6)))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (op->need_magic_ca_pass && sna->render.vbo)
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen4_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
#if FORCE_FLUSH
rem = sna->render.vertex_offset;
if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive)
rem = sna->kgem.nbatch - 5;
if (rem) {
rem = MAX_FLUSH_VERTICES - (sna->render.vertex_index - sna->render.vertex_start) / 3;
if (rem <= 0) {
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen4_magic_ca_pass(sna, op))
gen4_emit_pipelined_pointers(sna, op, op->op,
op->u.gen4.wm_kernel);
}
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
rem = MAX_FLUSH_VERTICES;
}
} else
rem = MAX_FLUSH_VERTICES;
if (want > rem)
want = rem;
#endif
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen4_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen4_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
static uint32_t *
gen4_composite_get_binding_table(struct sna *sna, uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
/* Clear all surplus entries to zero in case of prefetch */
*offset = sna->kgem.surface;
return memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen4_surface_state_padded));
}
 
static void
gen4_emit_urb(struct sna *sna)
{
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
int urb_clip_start, urb_clip_size;
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
 
if (!sna->render_state.gen4.needs_urb)
return;
 
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
urb_clip_start = urb_gs_start + urb_gs_size;
urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
urb_sf_start = urb_clip_start + urb_clip_size;
urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
while ((sna->kgem.nbatch & 15) > 12)
OUT_BATCH(MI_NOOP);
 
OUT_BATCH(GEN4_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
UF0_CLIP_REALLOC |
UF0_GS_REALLOC |
UF0_VS_REALLOC |
1);
OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 
/* Constant buffer state */
OUT_BATCH(GEN4_CS_URB_STATE | 0);
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
 
sna->render_state.gen4.needs_urb = false;
}
 
static void
gen4_emit_state_base_address(struct sna *sna)
{
assert(sna->render_state.gen4.general_bo->proxy == NULL);
OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4);
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
sna->kgem.nbatch,
sna->render_state.gen4.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* media */
 
/* upper bounds, all disabled */
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
}
 
static void
gen4_emit_invariant(struct sna *sna)
{
assert(sna->kgem.surface == sna->kgem.batch_size);
 
if (sna->kgem.gen >= 045)
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
else
OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen4_emit_state_base_address(sna);
 
sna->render_state.gen4.needs_invariant = false;
}
 
static void
gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150 + 50*FORCE_FLUSH, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen4.needs_invariant)
gen4_emit_invariant(sna);
}
 
static void
gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen4.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex;
}
}
 
static void
gen4_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen4.surface_table == offset)
return;
 
sna->render_state.gen4.surface_table = offset;
 
/* Binding table pointers */
OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
}
 
static void
gen4_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
uint16_t sp, bp;
uint32_t key;
 
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
__FUNCTION__, op->u.gen4.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel);
bp = gen4_get_blend(blend, op->has_component_alpha, op->dst.format);
 
DBG(("%s: sp=%d, bp=%d\n", __FUNCTION__, sp, bp));
key = sp | (uint32_t)bp << 16;
if (key == sna->render_state.gen4.last_pipelined_pointers)
return;
 
OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen4.vs);
OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen4.sf);
OUT_BATCH(sna->render_state.gen4.wm + sp);
OUT_BATCH(sna->render_state.gen4.cc + bp);
 
sna->render_state.gen4.last_pipelined_pointers = key;
gen4_emit_urb(sna);
}
 
static bool
gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen4.drawrect_limit == limit &&
sna->render_state.gen4.drawrect_offset == offset)
return true;
 
sna->render_state.gen4.drawrect_offset = offset;
sna->render_state.gen4.drawrect_limit = limit;
 
OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
return false;
}
 
static void
gen4_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen4_render_state *render = &sna->render_state.gen4;
uint32_t src_format, dw;
int id = op->u.gen4.ve_id;
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: position (x, y, 1.0, 1.0),
* dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0)
* [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0)
*/
OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + 2) - 1));
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
(1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u0, v0, w0 */
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN4_SURFACEFORMAT_R16G16_SSCALED;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 8 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
 
/* u1, v1, w1 */
if (id >> 2) {
unsigned src_offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__,
id >> 2, src_offset));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN4_SURFACEFORMAT_R32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
src_offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw | 12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
} else {
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT |
12 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT);
}
}
 
static void
gen4_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool flush;
 
assert(op->dst.bo->exec);
 
flush = wm_binding_table & 1;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d), forced? %d\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo),
flush));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
flush &= gen4_emit_drawing_rectangle(sna, op);
if (flush && op->op > PictOpSrc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
gen4_emit_binding_table(sna, wm_binding_table & ~1);
gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel);
gen4_emit_vertex_elements(sna, op);
}
 
static void
gen4_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen4_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
assert(op->u.gen4.ve_id >> 2);
binding_table[2] =
gen4_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen4.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen4.surface_table;
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
fastcall static void
gen4_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen4_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen4_get_rectangles(sna, op, 1, gen4_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen4_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen4_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
#if !FORCE_FLUSH
static void
gen4_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen4_get_rectangles(sna, op, nbox,
gen4_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen4_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen4_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN4_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen4_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen4_get_batch(sna, op);
 
binding_table = gen4_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen4_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen4_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen4_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen4_emit_state(sna, op, offset | dirty);
}
 
static bool
gen4_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen4.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen4.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen4_video_bind_surfaces(sna, &tmp);
gen4_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
do {
int n;
 
n = gen4_get_rectangles(sna, &tmp, nbox,
gen4_video_bind_surfaces);
assert(n);
nbox -= n;
 
do {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
} while (--n);
} while (nbox);
gen4_vertex_flush(sna);
 
return true;
}
 
static int
gen4_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_repeat(picture)) {
DBG(("%s: unknown repeat mode fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen4_check_filter(picture)) {
DBG(("%s: unhandled filter fixup\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen4_get_card_format(picture->format);
if (channel->card_format == -1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height))
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
static void gen4_composite_channel_convert(struct sna_composite_channel *channel)
{
DBG(("%s: repeat %d -> %d, filter %d -> %d\n",
__FUNCTION__,
channel->repeat, gen4_repeat(channel->repeat),
channel->filter, gen4_repeat(channel->filter)));
channel->repeat = gen4_repeat(channel->repeat);
channel->filter = gen4_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen4_get_card_format(channel->pict_format);
}
#endif
 
static void
gen4_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen4_magic_ca_pass(sna, op);
}
 
}
 
#if 0
static bool
gen4_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
try_blt(struct sna *sna,
PicturePtr dst, PicturePtr src,
int width, int height)
{
if (sna->kgem.mode != KGEM_RENDER) {
DBG(("%s: already performing BLT\n", __FUNCTION__));
return true;
}
 
if (too_large(width, height)) {
DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
__FUNCTION__, width, height));
return true;
}
 
if (too_large(dst->pDrawable->width, dst->pDrawable->height))
return true;
 
/* The blitter is much faster for solids */
if (sna_picture_is_solid(src, NULL))
return true;
 
/* is the source picture only in cpu memory e.g. a shm pixmap? */
return picture_is_cpu(sna, src);
}
 
static bool
check_gradient(PicturePtr picture, bool precise)
{
switch (picture->pSourcePict->type) {
case SourcePictTypeSolidFill:
case SourcePictTypeLinear:
return false;
default:
return precise;
}
}
 
static bool
has_alphamap(PicturePtr p)
{
return p->alphaMap != NULL;
}
 
static bool
need_upload(struct sna *sna, PicturePtr p)
{
return p->pDrawable && untransformed(p) &&
!is_gpu(sna, p->pDrawable, PREFER_GPU_RENDER);
}
 
static bool
source_is_busy(PixmapPtr pixmap)
{
struct sna_pixmap *priv = sna_pixmap(pixmap);
if (priv == NULL)
return false;
 
if (priv->clear)
return false;
 
if (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))
return true;
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
return priv->gpu_damage && !priv->cpu_damage;
}
 
static bool
source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise)
{
if (sna_picture_is_solid(p, NULL))
return false;
 
if (p->pSourcePict)
return check_gradient(p, precise);
 
if (!gen4_check_repeat(p) || !gen4_check_format(p->format))
return true;
 
/* soft errors: perfer to upload/compute rather than readback */
if (pixmap && source_is_busy(pixmap))
return false;
 
return has_alphamap(p) || !gen4_check_filter(p) || need_upload(sna, p);
}
 
static bool
gen4_composite_fallback(struct sna *sna,
PicturePtr src,
PicturePtr mask,
PicturePtr dst)
{
PixmapPtr src_pixmap;
PixmapPtr mask_pixmap;
PixmapPtr dst_pixmap;
bool src_fallback, mask_fallback;
 
if (!gen4_check_dst_format(dst->format)) {
DBG(("%s: unknown destination format: %d\n",
__FUNCTION__, dst->format));
return true;
}
 
dst_pixmap = get_drawable_pixmap(dst->pDrawable);
 
src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
src_fallback = source_fallback(sna, src, src_pixmap,
dst->polyMode == PolyModePrecise);
 
if (mask) {
mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
mask_fallback = source_fallback(sna, mask, mask_pixmap,
dst->polyMode == PolyModePrecise);
} else {
mask_pixmap = NULL;
mask_fallback = false;
}
 
/* If we are using the destination as a source and need to
* readback in order to upload the source, do it all
* on the cpu.
*/
if (src_pixmap == dst_pixmap && src_fallback) {
DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
return true;
}
if (mask_pixmap == dst_pixmap && mask_fallback) {
DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
return true;
}
 
/* If anything is on the GPU, push everything out to the GPU */
if (dst_use_gpu(dst_pixmap)) {
DBG(("%s: dst is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
if (src_pixmap && !src_fallback) {
DBG(("%s: src is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
if (mask_pixmap && !mask_fallback) {
DBG(("%s: mask is already on the GPU, try to use GPU\n",
__FUNCTION__));
return false;
}
 
/* However if the dst is not on the GPU and we need to
* render one of the sources using the CPU, we may
* as well do the entire operation in place onthe CPU.
*/
if (src_fallback) {
DBG(("%s: dst is on the CPU and src will fallback\n",
__FUNCTION__));
return true;
}
 
if (mask_fallback) {
DBG(("%s: dst is on the CPU and mask will fallback\n",
__FUNCTION__));
return true;
}
 
if (too_large(dst_pixmap->drawable.width,
dst_pixmap->drawable.height) &&
dst_is_cpu(dst_pixmap)) {
DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
return true;
}
 
DBG(("%s: dst is not on the GPU and the operation should not fallback\n",
__FUNCTION__));
return dst_use_cpu(dst_pixmap);
}
 
static int
reuse_source(struct sna *sna,
PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
{
uint32_t color;
 
if (src_x != msk_x || src_y != msk_y)
return false;
 
if (src == mask) {
DBG(("%s: mask is source\n", __FUNCTION__));
*mc = *sc;
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
if (sna_picture_is_solid(mask, &color))
return gen4_channel_init_solid(sna, mc, color);
 
if (sc->is_solid)
return false;
 
if (src->pDrawable == NULL || mask->pDrawable != src->pDrawable)
return false;
 
DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
 
if (!sna_transform_equal(src->transform, mask->transform))
return false;
 
if (!sna_picture_alphamap_equal(src, mask))
return false;
 
if (!gen4_check_repeat(mask))
return false;
 
if (!gen4_check_filter(mask))
return false;
 
if (!gen4_check_format(mask->format))
return false;
 
DBG(("%s: reusing source channel for mask with a twist\n",
__FUNCTION__));
 
*mc = *sc;
mc->repeat = gen4_repeat(mask->repeat ? mask->repeatType : RepeatNone);
mc->filter = gen4_filter(mask->filter);
mc->pict_format = mask->format;
mc->card_format = gen4_get_card_format(mask->format);
mc->bo = kgem_bo_reference(mc->bo);
return true;
}
 
static bool
gen4_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
if (op >= ARRAY_SIZE(gen4_blend_op))
return false;
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen4_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (!gen4_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
 
tmp->op = op;
switch (gen4_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen4_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen4_blend_op[op].src_alpha &&
(gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s -- fallback: unhandled component alpha blend\n",
__FUNCTION__));
 
goto cleanup_src;
}
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen4_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen4_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen4.wm_kernel =
gen4_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->box = gen4_render_composite_box;
tmp->boxes = gen4_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen4_render_composite_boxes;
#if !FORCE_FLUSH
tmp->thread_boxes = gen4_render_composite_boxes__thread;
#endif
}
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static void
gen4_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen4_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen4_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen4_render_reset(struct sna *sna)
{
sna->render_state.gen4.needs_invariant = true;
sna->render_state.gen4.needs_urb = true;
sna->render_state.gen4.ve_id = -1;
sna->render_state.gen4.last_primitive = -1;
sna->render_state.gen4.last_pipelined_pointers = -1;
 
sna->render_state.gen4.drawrect_offset = -1;
sna->render_state.gen4.drawrect_limit = -1;
sna->render_state.gen4.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen4_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo);
}
 
static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream)
{
struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
/* Set up the vertex shader to be disabled (passthrough) */
vs->thread4.nr_urb_entries = URB_VS_ENTRIES;
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs->vs6.vs_enable = 0;
vs->vs6.vert_cache_disable = 1;
 
return sna_static_stream_offsetof(stream, vs);
}
 
static uint32_t gen4_create_sf_state(struct sna_static_stream *stream,
uint32_t kernel)
{
struct gen4_sf_unit_state *sf;
 
sf = sna_static_stream_map(stream, sizeof(*sf), 32);
 
sf->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf->thread0.kernel_start_pointer = kernel >> 6;
sf->thread3.const_urb_entry_read_length = 0; /* no const URBs */
sf->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
sf->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
/* don't smash vertex header, read start from dw8 */
sf->thread3.urb_entry_read_offset = 1;
sf->thread3.dispatch_grf_start_reg = 3;
sf->thread4.max_threads = GEN4_MAX_SF_THREADS - 1;
sf->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf->sf5.viewport_transform = false; /* skip viewport */
sf->sf6.cull_mode = GEN4_CULLMODE_NONE;
sf->sf6.scissor = 0;
sf->sf7.trifan_pv = 2;
sf->sf6.dest_org_vbias = 0x8;
sf->sf6.dest_org_hbias = 0x8;
 
return sna_static_stream_offsetof(stream, sf);
}
 
static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream,
sampler_filter_t src_filter,
sampler_extend_t src_extend,
sampler_filter_t mask_filter,
sampler_extend_t mask_extend)
{
struct gen4_sampler_state *sampler_state;
 
sampler_state = sna_static_stream_map(stream,
sizeof(struct gen4_sampler_state) * 2,
32);
sampler_state_init(&sampler_state[0], src_filter, src_extend);
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
return sna_static_stream_offsetof(stream, sampler_state);
}
 
static void gen4_init_wm_state(struct gen4_wm_unit_state *wm,
int gen,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
{
assert((kernel & 63) == 0);
wm->thread0.kernel_start_pointer = kernel >> 6;
wm->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
 
wm->thread1.single_program_flow = 0;
 
wm->thread3.const_urb_entry_read_length = 0;
wm->thread3.const_urb_entry_read_offset = 0;
 
wm->thread3.urb_entry_read_offset = 0;
wm->thread3.dispatch_grf_start_reg = 3;
 
assert((sampler & 31) == 0);
wm->wm4.sampler_state_pointer = sampler >> 5;
wm->wm4.sampler_count = 1;
 
wm->wm5.max_threads = gen >= 045 ? G4X_MAX_WM_THREADS - 1 : GEN4_MAX_WM_THREADS - 1;
wm->wm5.transposed_urb_read = 0;
wm->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
* start point
*/
wm->wm5.enable_16_pix = 1;
wm->wm5.enable_8_pix = 0;
wm->wm5.early_depth_test = 1;
 
/* Each pair of attributes (src/mask coords) is two URB entries */
if (has_mask) {
wm->thread1.binding_table_entry_count = 3;
wm->thread3.urb_entry_read_length = 4;
} else {
wm->thread1.binding_table_entry_count = 2;
wm->thread3.urb_entry_read_length = 2;
}
}
 
static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
int i, j;
 
base = ptr =
sna_static_stream_map(stream,
GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64,
64);
 
for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) {
struct gen4_cc_unit_state *state =
(struct gen4_cc_unit_state *)ptr;
 
state->cc3.blend_enable =
!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
 
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD;
 
/* Fill in alpha blend factors same as color, for the future. */
state->cc5.ia_src_blend_factor = i;
state->cc5.ia_dest_blend_factor = j;
 
state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD;
state->cc6.clamp_post_alpha_blend = 1;
state->cc6.clamp_pre_alpha_blend = 1;
state->cc6.src_blend_factor = i;
state->cc6.dest_blend_factor = j;
 
ptr += 64;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
static bool gen4_render_setup(struct sna *sna)
{
struct gen4_render_state *state = &sna->render_state.gen4;
struct sna_static_stream general;
struct gen4_wm_unit_state_padded *wm_state;
uint32_t sf, wm[KERNEL_COUNT];
int i, j, k, l, m;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
sf = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
wm[m] = sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data,
16);
}
}
 
state->vs = gen4_create_vs_unit_state(&general);
state->sf = gen4_create_sf_state(&general, sf);
 
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT,
64);
state->wm = sna_static_stream_offsetof(&general, wm_state);
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
uint32_t sampler_state;
 
sampler_state =
gen4_create_sampler_state(&general,
i, j,
k, l);
 
for (m = 0; m < KERNEL_COUNT; m++) {
gen4_init_wm_state(&wm_state->state,
sna->kgem.gen,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
}
}
}
}
}
 
state->cc = gen4_create_cc_unit_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
const char *gen4_render_init(struct sna *sna, const char *backend)
{
if (!gen4_render_setup(sna))
return backend;
 
sna->kgem.retire = gen4_render_retire;
sna->kgem.expire = gen4_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen4_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen4_check_composite_spans;
sna->render.composite_spans = gen4_render_composite_spans;
if (0)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
 
#if !NO_VIDEO
sna->render.video = gen4_render_video;
#endif
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen4_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen4_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen4_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen4_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen4_render_fill_one;
#endif
 
#endif
 
sna->render.blit_tex = gen4_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen4_render_flush;
sna->render.reset = gen4_render_reset;
sna->render.fini = gen4_render_fini;
 
sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->kgem.gen >= 045 ? "Eaglelake (gen4.5)" : "Broadwater (gen4)";
}
 
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_a8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen4_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen4_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen4.wm_kernel = WM_KERNEL_MASK;
// gen4_choose_composite_kernel(tmp->op,
// tmp->mask.bo != NULL,
// tmp->has_component_alpha,
// tmp->is_affine);
tmp->u.gen4.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen4_render_composite_blt;
tmp->done = gen4_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
}
 
gen4_bind_surfaces(sna, tmp);
gen4_align_vertex(sna, tmp);
return true;
}
 
/drivers/video/Intel-2D/sna/gen4_render.h
0,0 → 1,2693
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef GEN4_RENDER_H
#define GEN4_RENDER_H
 
#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN4_URB_FENCE GEN4_3D(0, 0, 0)
#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1)
#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2)
#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3)
 
#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1)
#define GEN4_STATE_SIP GEN4_3D(0, 1, 2)
#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4)
 
#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4)
 
#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0)
#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0)
 
#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0)
#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1)
 
#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8)
#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9)
#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa)
#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb)
 
#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0)
#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1)
#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2)
#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4)
#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5)
# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6)
#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7)
#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8)
#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa)
#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb)
 
#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0)
 
#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0)
 
#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10)
/* DW1 */
# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN4_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN4_3DSTATE_PIPELINED_POINTERS */
#define GEN4_GS_DISABLE 0
#define GEN4_GS_ENABLE 1
#define GEN4_CLIP_DISABLE 0
#define GEN4_CLIP_ENABLE 1
 
/* for GEN4_PIPE_CONTROL */
#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define VE0_VALID (1 << 26)
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen4_defines.h */
#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN4_SVG_CTL 0x7400
 
#define GEN4_SVG_CTL_GS_BA (0 << 8)
#define GEN4_SVG_CTL_SS_BA (1 << 8)
#define GEN4_SVG_CTL_IO_BA (2 << 8)
#define GEN4_SVG_CTL_GS_AUB (3 << 8)
#define GEN4_SVG_CTL_IO_AUB (4 << 8)
#define GEN4_SVG_CTL_SIP (5 << 8)
 
#define GEN4_SVG_RDATA 0x7404
#define GEN4_SVG_WORK_CTL 0x7408
 
#define GEN4_VF_CTL 0x7500
 
#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VF_STRG_VAL 0x7504
#define GEN4_VF_STR_VL_OVR 0x7508
#define GEN4_VF_VC_OVR 0x750c
#define GEN4_VF_STR_PSKIP 0x7510
#define GEN4_VF_MAX_PRIM 0x7514
#define GEN4_VF_RDATA 0x7518
 
#define GEN4_VS_CTL 0x7600
#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_VS_STRG_VAL 0x7604
#define GEN4_VS_RDATA 0x7608
 
#define GEN4_SF_CTL 0x7b00
#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_SF_STRG_VAL 0x7b04
#define GEN4_SF_RDATA 0x7b18
 
#define GEN4_WIZ_CTL 0x7c00
#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_WIZ_STRG_VAL 0x7c04
#define GEN4_WIZ_RDATA 0x7c18
 
#define GEN4_TS_CTL 0x7e00
#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN4_TS_STRG_VAL 0x7e04
#define GEN4_TS_RDATA 0x7e08
 
#define GEN4_TD_CTL 0x8000
#define GEN4_TD_CTL_MUX_SHIFT 8
#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN4_TD_CTL2 0x8004
#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN4_TD_VF_VS_EMSK 0x8008
#define GEN4_TD_GS_EMSK 0x800c
#define GEN4_TD_CLIP_EMSK 0x8010
#define GEN4_TD_SF_EMSK 0x8014
#define GEN4_TD_WIZ_EMSK 0x8018
#define GEN4_TD_0_6_EHTRG_VAL 0x801c
#define GEN4_TD_0_7_EHTRG_VAL 0x8020
#define GEN4_TD_0_6_EHTRG_MSK 0x8024
#define GEN4_TD_0_7_EHTRG_MSK 0x8028
#define GEN4_TD_RDATA 0x802c
#define GEN4_TD_TS_EMSK 0x8030
 
#define GEN4_EU_CTL 0x8800
#define GEN4_EU_CTL_SELECT_SHIFT 16
#define GEN4_EU_CTL_DATA_MUX_SHIFT 8
#define GEN4_EU_ATT_0 0x8810
#define GEN4_EU_ATT_1 0x8814
#define GEN4_EU_ATT_DATA_0 0x8820
#define GEN4_EU_ATT_DATA_1 0x8824
#define GEN4_EU_ATT_CLR_0 0x8830
#define GEN4_EU_ATT_CLR_1 0x8834
#define GEN4_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN4_ANISORATIO_2 0
#define GEN4_ANISORATIO_4 1
#define GEN4_ANISORATIO_6 2
#define GEN4_ANISORATIO_8 3
#define GEN4_ANISORATIO_10 4
#define GEN4_ANISORATIO_12 5
#define GEN4_ANISORATIO_14 6
#define GEN4_ANISORATIO_16 7
 
#define GEN4_BLENDFACTOR_ONE 0x1
#define GEN4_BLENDFACTOR_SRC_COLOR 0x2
#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN4_BLENDFACTOR_DST_ALPHA 0x4
#define GEN4_BLENDFACTOR_DST_COLOR 0x5
#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN4_BLENDFACTOR_CONST_COLOR 0x7
#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN4_BLENDFACTOR_ZERO 0x11
#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN4_BLENDFUNCTION_ADD 0
#define GEN4_BLENDFUNCTION_SUBTRACT 1
#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN4_BLENDFUNCTION_MIN 3
#define GEN4_BLENDFUNCTION_MAX 4
 
#define GEN4_ALPHATEST_FORMAT_UNORM8 0
#define GEN4_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN4_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN4_CLIP_API_OGL 0
#define GEN4_CLIP_API_DX 1
 
#define GEN4_CLIPMODE_NORMAL 0
#define GEN4_CLIPMODE_CLIP_ALL 1
#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN4_CLIPMODE_REJECT_ALL 3
#define GEN4_CLIPMODE_ACCEPT_ALL 4
 
#define GEN4_CLIP_NDCSPACE 0
#define GEN4_CLIP_SCREENSPACE 1
 
#define GEN4_COMPAREFUNCTION_ALWAYS 0
#define GEN4_COMPAREFUNCTION_NEVER 1
#define GEN4_COMPAREFUNCTION_LESS 2
#define GEN4_COMPAREFUNCTION_EQUAL 3
#define GEN4_COMPAREFUNCTION_LEQUAL 4
#define GEN4_COMPAREFUNCTION_GREATER 5
#define GEN4_COMPAREFUNCTION_NOTEQUAL 6
#define GEN4_COMPAREFUNCTION_GEQUAL 7
 
#define GEN4_COVERAGE_PIXELS_HALF 0
#define GEN4_COVERAGE_PIXELS_1 1
#define GEN4_COVERAGE_PIXELS_2 2
#define GEN4_COVERAGE_PIXELS_4 3
 
#define GEN4_CULLMODE_BOTH 0
#define GEN4_CULLMODE_NONE 1
#define GEN4_CULLMODE_FRONT 2
#define GEN4_CULLMODE_BACK 3
 
#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN4_DEPTHFORMAT_D32_FLOAT 1
#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN4_DEPTHFORMAT_D16_UNORM 5
 
#define GEN4_FLOATING_POINT_IEEE_754 0
#define GEN4_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN4_FRONTWINDING_CW 0
#define GEN4_FRONTWINDING_CCW 1
 
#define GEN4_INDEX_BYTE 0
#define GEN4_INDEX_WORD 1
#define GEN4_INDEX_DWORD 2
 
#define GEN4_LOGICOPFUNCTION_CLEAR 0
#define GEN4_LOGICOPFUNCTION_NOR 1
#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN4_LOGICOPFUNCTION_INVERT 5
#define GEN4_LOGICOPFUNCTION_XOR 6
#define GEN4_LOGICOPFUNCTION_NAND 7
#define GEN4_LOGICOPFUNCTION_AND 8
#define GEN4_LOGICOPFUNCTION_EQUIV 9
#define GEN4_LOGICOPFUNCTION_NOOP 10
#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN4_LOGICOPFUNCTION_COPY 12
#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN4_LOGICOPFUNCTION_OR 14
#define GEN4_LOGICOPFUNCTION_SET 15
 
#define GEN4_MAPFILTER_NEAREST 0x0
#define GEN4_MAPFILTER_LINEAR 0x1
#define GEN4_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN4_MIPFILTER_NONE 0
#define GEN4_MIPFILTER_NEAREST 1
#define GEN4_MIPFILTER_LINEAR 3
 
#define GEN4_POLYGON_FRONT_FACING 0
#define GEN4_POLYGON_BACK_FACING 1
 
#define GEN4_PREFILTER_ALWAYS 0x0
#define GEN4_PREFILTER_NEVER 0x1
#define GEN4_PREFILTER_LESS 0x2
#define GEN4_PREFILTER_EQUAL 0x3
#define GEN4_PREFILTER_LEQUAL 0x4
#define GEN4_PREFILTER_GREATER 0x5
#define GEN4_PREFILTER_NOTEQUAL 0x6
#define GEN4_PREFILTER_GEQUAL 0x7
 
#define GEN4_PROVOKING_VERTEX_0 0
#define GEN4_PROVOKING_VERTEX_1 1
#define GEN4_PROVOKING_VERTEX_2 2
 
#define GEN4_RASTRULE_UPPER_LEFT 0
#define GEN4_RASTRULE_UPPER_RIGHT 1
 
#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN4_STENCILOP_KEEP 0
#define GEN4_STENCILOP_ZERO 1
#define GEN4_STENCILOP_REPLACE 2
#define GEN4_STENCILOP_INCRSAT 3
#define GEN4_STENCILOP_DECRSAT 4
#define GEN4_STENCILOP_INCR 5
#define GEN4_STENCILOP_DECR 6
#define GEN4_STENCILOP_INVERT 7
 
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN4_SURFACEFORMAT_R16_SINT 0x10C
#define GEN4_SURFACEFORMAT_R16_UINT 0x10D
#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN4_SURFACEFORMAT_I16_UNORM 0x111
#define GEN4_SURFACEFORMAT_L16_UNORM 0x112
#define GEN4_SURFACEFORMAT_A16_UNORM 0x113
#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN4_SURFACEFORMAT_R8_UNORM 0x140
#define GEN4_SURFACEFORMAT_R8_SNORM 0x141
#define GEN4_SURFACEFORMAT_R8_SINT 0x142
#define GEN4_SURFACEFORMAT_R8_UINT 0x143
#define GEN4_SURFACEFORMAT_A8_UNORM 0x144
#define GEN4_SURFACEFORMAT_I8_UNORM 0x145
#define GEN4_SURFACEFORMAT_L8_UNORM 0x146
#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN4_SURFACEFORMAT_R1_UINT 0x181
#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN4_SURFACEFORMAT_MONO8 0x18E
#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN4_SURFACEFORMAT_FXT1 0x192
#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN4_SURFACERETURNFORMAT_FLOAT32 0
#define GEN4_SURFACERETURNFORMAT_S1 1
 
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
#define GEN4_BORDER_COLOR_MODE_DEFAULT 0
#define GEN4_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN4_TEXCOORDMODE_WRAP 0
#define GEN4_TEXCOORDMODE_MIRROR 1
#define GEN4_TEXCOORDMODE_CLAMP 2
#define GEN4_TEXCOORDMODE_CUBE 3
#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN4_THREAD_PRIORITY_NORMAL 0
#define GEN4_THREAD_PRIORITY_HIGH 1
 
#define GEN4_TILEWALK_XMAJOR 0
#define GEN4_TILEWALK_YMAJOR 1
 
#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define VFCOMPONENT_NOSTORE 0
#define VFCOMPONENT_STORE_SRC 1
#define VFCOMPONENT_STORE_0 2
#define VFCOMPONENT_STORE_1_FLT 3
#define VFCOMPONENT_STORE_1_INT 4
#define VFCOMPONENT_STORE_VID 5
#define VFCOMPONENT_STORE_IID 6
#define VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN4_ALIGN_1 0
#define GEN4_ALIGN_16 1
 
#define GEN4_ADDRESS_DIRECT 0
#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN4_CHANNEL_X 0
#define GEN4_CHANNEL_Y 1
#define GEN4_CHANNEL_Z 2
#define GEN4_CHANNEL_W 3
 
#define GEN4_COMPRESSION_NONE 0
#define GEN4_COMPRESSION_2NDHALF 1
#define GEN4_COMPRESSION_COMPRESSED 2
 
#define GEN4_CONDITIONAL_NONE 0
#define GEN4_CONDITIONAL_Z 1
#define GEN4_CONDITIONAL_NZ 2
#define GEN4_CONDITIONAL_EQ 1 /* Z */
#define GEN4_CONDITIONAL_NEQ 2 /* NZ */
#define GEN4_CONDITIONAL_G 3
#define GEN4_CONDITIONAL_GE 4
#define GEN4_CONDITIONAL_L 5
#define GEN4_CONDITIONAL_LE 6
#define GEN4_CONDITIONAL_C 7
#define GEN4_CONDITIONAL_O 8
 
#define GEN4_DEBUG_NONE 0
#define GEN4_DEBUG_BREAKPOINT 1
 
#define GEN4_DEPENDENCY_NORMAL 0
#define GEN4_DEPENDENCY_NOTCLEARED 1
#define GEN4_DEPENDENCY_NOTCHECKED 2
#define GEN4_DEPENDENCY_DISABLE 3
 
#define GEN4_EXECUTE_1 0
#define GEN4_EXECUTE_2 1
#define GEN4_EXECUTE_4 2
#define GEN4_EXECUTE_8 3
#define GEN4_EXECUTE_16 4
#define GEN4_EXECUTE_32 5
 
#define GEN4_HORIZONTAL_STRIDE_0 0
#define GEN4_HORIZONTAL_STRIDE_1 1
#define GEN4_HORIZONTAL_STRIDE_2 2
#define GEN4_HORIZONTAL_STRIDE_4 3
 
#define GEN4_INSTRUCTION_NORMAL 0
#define GEN4_INSTRUCTION_SATURATE 1
 
#define _MASK_ENABLE 0
#define _MASK_DISABLE 1
 
#define GEN4_OPCODE_MOV 1
#define GEN4_OPCODE_SEL 2
#define GEN4_OPCODE_NOT 4
#define GEN4_OPCODE_AND 5
#define GEN4_OPCODE_OR 6
#define GEN4_OPCODE_XOR 7
#define GEN4_OPCODE_SHR 8
#define GEN4_OPCODE_SHL 9
#define GEN4_OPCODE_RSR 10
#define GEN4_OPCODE_RSL 11
#define GEN4_OPCODE_ASR 12
#define GEN4_OPCODE_CMP 16
#define GEN4_OPCODE_JMPI 32
#define GEN4_OPCODE_IF 34
#define GEN4_OPCODE_IFF 35
#define GEN4_OPCODE_ELSE 36
#define GEN4_OPCODE_ENDIF 37
#define GEN4_OPCODE_DO 38
#define GEN4_OPCODE_WHILE 39
#define GEN4_OPCODE_BREAK 40
#define GEN4_OPCODE_CONTINUE 41
#define GEN4_OPCODE_HALT 42
#define GEN4_OPCODE_MSAVE 44
#define GEN4_OPCODE_MRESTORE 45
#define GEN4_OPCODE_PUSH 46
#define GEN4_OPCODE_POP 47
#define GEN4_OPCODE_WAIT 48
#define GEN4_OPCODE_SEND 49
#define GEN4_OPCODE_ADD 64
#define GEN4_OPCODE_MUL 65
#define GEN4_OPCODE_AVG 66
#define GEN4_OPCODE_FRC 67
#define GEN4_OPCODE_RNDU 68
#define GEN4_OPCODE_RNDD 69
#define GEN4_OPCODE_RNDE 70
#define GEN4_OPCODE_RNDZ 71
#define GEN4_OPCODE_MAC 72
#define GEN4_OPCODE_MACH 73
#define GEN4_OPCODE_LZD 74
#define GEN4_OPCODE_SAD2 80
#define GEN4_OPCODE_SADA2 81
#define GEN4_OPCODE_DP4 84
#define GEN4_OPCODE_DPH 85
#define GEN4_OPCODE_DP3 86
#define GEN4_OPCODE_DP2 87
#define GEN4_OPCODE_DPA2 88
#define GEN4_OPCODE_LINE 89
#define GEN4_OPCODE_NOP 126
 
#define GEN4_PREDICATE_NONE 0
#define GEN4_PREDICATE_NORMAL 1
#define GEN4_PREDICATE_ALIGN1_ANYV 2
#define GEN4_PREDICATE_ALIGN1_ALLV 3
#define GEN4_PREDICATE_ALIGN1_ANY2H 4
#define GEN4_PREDICATE_ALIGN1_ALL2H 5
#define GEN4_PREDICATE_ALIGN1_ANY4H 6
#define GEN4_PREDICATE_ALIGN1_ALL4H 7
#define GEN4_PREDICATE_ALIGN1_ANY8H 8
#define GEN4_PREDICATE_ALIGN1_ALL8H 9
#define GEN4_PREDICATE_ALIGN1_ANY16H 10
#define GEN4_PREDICATE_ALIGN1_ALL16H 11
#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN4_PREDICATE_ALIGN16_ANY4H 6
#define GEN4_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN4_ARCHITECTURE_REGISTER_FILE 0
#define GEN4_GENERAL_REGISTER_FILE 1
#define GEN4_MESSAGE_REGISTER_FILE 2
#define GEN4_IMMEDIATE_VALUE 3
 
#define GEN4_REGISTER_TYPE_UD 0
#define GEN4_REGISTER_TYPE_D 1
#define GEN4_REGISTER_TYPE_UW 2
#define GEN4_REGISTER_TYPE_W 3
#define GEN4_REGISTER_TYPE_UB 4
#define GEN4_REGISTER_TYPE_B 5
#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN4_REGISTER_TYPE_HF 6
#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN4_REGISTER_TYPE_F 7
 
#define GEN4_ARF_NULL 0x00
#define GEN4_ARF_ADDRESS 0x10
#define GEN4_ARF_ACCUMULATOR 0x20
#define GEN4_ARF_FLAG 0x30
#define GEN4_ARF_MASK 0x40
#define GEN4_ARF_MASK_STACK 0x50
#define GEN4_ARF_MASK_STACK_DEPTH 0x60
#define GEN4_ARF_STATE 0x70
#define GEN4_ARF_CONTROL 0x80
#define GEN4_ARF_NOTIFICATION_COUNT 0x90
#define GEN4_ARF_IP 0xA0
 
#define GEN4_AMASK 0
#define GEN4_IMASK 1
#define GEN4_LMASK 2
#define GEN4_CMASK 3
 
 
 
#define GEN4_THREAD_NORMAL 0
#define GEN4_THREAD_ATOMIC 1
#define GEN4_THREAD_SWITCH 2
 
#define GEN4_VERTICAL_STRIDE_0 0
#define GEN4_VERTICAL_STRIDE_1 1
#define GEN4_VERTICAL_STRIDE_2 2
#define GEN4_VERTICAL_STRIDE_4 3
#define GEN4_VERTICAL_STRIDE_8 4
#define GEN4_VERTICAL_STRIDE_16 5
#define GEN4_VERTICAL_STRIDE_32 6
#define GEN4_VERTICAL_STRIDE_64 7
#define GEN4_VERTICAL_STRIDE_128 8
#define GEN4_VERTICAL_STRIDE_256 9
#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN4_WIDTH_1 0
#define GEN4_WIDTH_2 1
#define GEN4_WIDTH_4 2
#define GEN4_WIDTH_8 3
#define GEN4_WIDTH_16 4
 
#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN4_POLYGON_FACING_FRONT 0
#define GEN4_POLYGON_FACING_BACK 1
 
#define GEN4_MESSAGE_TARGET_NULL 0
#define GEN4_MESSAGE_TARGET_MATH 1
#define GEN4_MESSAGE_TARGET_SAMPLER 2
#define GEN4_MESSAGE_TARGET_GATEWAY 3
#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN4_MESSAGE_TARGET_URB 6
#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN4_MATH_FUNCTION_INV 1
#define GEN4_MATH_FUNCTION_LOG 2
#define GEN4_MATH_FUNCTION_EXP 3
#define GEN4_MATH_FUNCTION_SQRT 4
#define GEN4_MATH_FUNCTION_RSQ 5
#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN4_MATH_FUNCTION_TAN 9
#define GEN4_MATH_FUNCTION_POW 10
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN4_MATH_INTEGER_UNSIGNED 0
#define GEN4_MATH_INTEGER_SIGNED 1
 
#define GEN4_MATH_PRECISION_FULL 0
#define GEN4_MATH_PRECISION_PARTIAL 1
 
#define GEN4_MATH_SATURATE_NONE 0
#define GEN4_MATH_SATURATE_SATURATE 1
 
#define GEN4_MATH_DATA_VECTOR 0
#define GEN4_MATH_DATA_SCALAR 1
 
#define GEN4_URB_OPCODE_WRITE 0
 
#define GEN4_URB_SWIZZLE_NONE 0
#define GEN4_URB_SWIZZLE_INTERLEAVE 1
#define GEN4_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN4_SCRATCH_SPACE_SIZE_1K 0
#define GEN4_SCRATCH_SPACE_SIZE_2K 1
#define GEN4_SCRATCH_SPACE_SIZE_4K 2
#define GEN4_SCRATCH_SPACE_SIZE_8K 3
#define GEN4_SCRATCH_SPACE_SIZE_16K 4
#define GEN4_SCRATCH_SPACE_SIZE_32K 5
#define GEN4_SCRATCH_SPACE_SIZE_64K 6
#define GEN4_SCRATCH_SPACE_SIZE_128K 7
#define GEN4_SCRATCH_SPACE_SIZE_256K 8
#define GEN4_SCRATCH_SPACE_SIZE_512K 9
#define GEN4_SCRATCH_SPACE_SIZE_1M 10
#define GEN4_SCRATCH_SPACE_SIZE_2M 11
 
 
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CONST_BUFFER_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT 0x6104
 
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
#define CMD_VERTEX_ELEMENT 0x7809
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS 0x780b
 
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_3D_PRIM 0x7b00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
 
/* media pipeline */
 
#define GEN4_VFE_MODE_GENERIC 0x0
#define GEN4_VFE_MODE_VLD_MPEG2 0x1
#define GEN4_VFE_MODE_IS 0x2
#define GEN4_VFE_MODE_AVC_MC 0x4
#define GEN4_VFE_MODE_AVC_IT 0x7
#define GEN4_VFE_MODE_VC1_IT 0xB
 
#define GEN4_VFE_DEBUG_COUNTER_FREE 0
#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1
#define GEN4_VFE_DEBUG_COUNTER_ONCE 2
#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3
 
/* VLD_STATE */
#define GEN4_MPEG_TOP_FIELD 1
#define GEN4_MPEG_BOTTOM_FIELD 2
#define GEN4_MPEG_FRAME 3
#define GEN4_MPEG_QSCALE_LINEAR 0
#define GEN4_MPEG_QSCALE_NONLINEAR 1
#define GEN4_MPEG_ZIGZAG_SCAN 0
#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1
#define GEN4_MPEG_I_PICTURE 1
#define GEN4_MPEG_P_PICTURE 2
#define GEN4_MPEG_B_PICTURE 3
 
/* Command packets:
*/
struct header
{
unsigned int length:16;
unsigned int opcode:16;
};
 
 
union header_union
{
struct header bits;
unsigned int dword;
};
 
struct gen4_3d_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:3;
unsigned int wc_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int operation:2;
unsigned int opcode:16;
} header;
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} dest;
unsigned int dword2;
unsigned int dword3;
};
 
 
struct gen4_3d_primitive
{
struct
{
unsigned int length:8;
unsigned int pad:2;
unsigned int topology:5;
unsigned int indexed:1;
unsigned int opcode:16;
} header;
 
unsigned int verts_per_instance;
unsigned int start_vert_location;
unsigned int instance_count;
unsigned int start_instance_location;
unsigned int base_vert_location;
};
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define GEN4_FLUSH_READ_CACHE 0x1
#define GEN4_FLUSH_STATE_CACHE 0x2
#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct gen4_mi_flush
{
unsigned int flags:4;
unsigned int pad:12;
unsigned int opcode:16;
};
 
struct gen4_vf_statistics
{
unsigned int statistics_enable:1;
unsigned int pad:15;
unsigned int opcode:16;
};
 
 
 
struct gen4_binding_table_pointers
{
struct header header;
unsigned int vs;
unsigned int gs;
unsigned int clp;
unsigned int sf;
unsigned int wm;
};
 
 
struct gen4_blend_constant_color
{
struct header header;
float blend_constant_color[4];
};
 
 
struct gen4_depthbuffer
{
union header_union header;
union {
struct {
unsigned int pitch:18;
unsigned int format:3;
unsigned int pad:4;
unsigned int depth_offset_disable:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad2:1;
unsigned int surface_type:3;
} bits;
unsigned int dword;
} dword1;
unsigned int dword2_base_addr;
union {
struct {
unsigned int pad:1;
unsigned int mipmap_layout:1;
unsigned int lod:4;
unsigned int width:13;
unsigned int height:13;
} bits;
unsigned int dword;
} dword3;
 
union {
struct {
unsigned int pad:12;
unsigned int min_array_element:9;
unsigned int depth:11;
} bits;
unsigned int dword;
} dword4;
};
 
struct gen4_drawrect
{
struct header header;
unsigned int xmin:16;
unsigned int ymin:16;
unsigned int xmax:16;
unsigned int ymax:16;
unsigned int xorg:16;
unsigned int yorg:16;
};
 
 
 
 
struct gen4_global_depth_offset_clamp
{
struct header header;
float depth_offset_clamp;
};
 
struct gen4_indexbuffer
{
union {
struct
{
unsigned int length:8;
unsigned int index_format:2;
unsigned int cut_index_enable:1;
unsigned int pad:5;
unsigned int opcode:16;
} bits;
unsigned int dword;
 
} header;
 
unsigned int buffer_start;
unsigned int buffer_end;
};
 
 
struct gen4_line_stipple
{
struct header header;
struct
{
unsigned int pattern:16;
unsigned int pad:16;
} bits0;
struct
{
unsigned int repeat_count:9;
unsigned int pad:7;
unsigned int inverse_repeat_count:16;
} bits1;
};
 
 
struct gen4_pipelined_state_pointers
{
struct header header;
struct {
unsigned int pad:5;
unsigned int offset:27;
} vs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} gs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} clp;
struct
{
unsigned int pad:5;
unsigned int offset:27;
} sf;
 
struct
{
unsigned int pad:5;
unsigned int offset:27;
} wm;
struct
{
unsigned int pad:5;
unsigned int offset:27; /* KW: check me! */
} cc;
};
 
 
struct gen4_polygon_stipple_offset
{
struct header header;
 
struct {
unsigned int y_offset:5;
unsigned int pad:3;
unsigned int x_offset:5;
unsigned int pad0:19;
} bits0;
};
 
 
 
struct gen4_polygon_stipple
{
struct header header;
unsigned int stipple[32];
};
 
 
 
struct gen4_pipeline_select
{
struct
{
unsigned int pipeline_select:1;
unsigned int pad:15;
unsigned int opcode:16;
} header;
};
 
 
struct gen4_pipe_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:2;
unsigned int instruction_state_cache_flush_enable:1;
unsigned int write_cache_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int post_sync_operation:2;
 
unsigned int opcode:16;
} header;
 
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} bits1;
 
unsigned int data0;
unsigned int data1;
};
 
 
struct gen4_urb_fence
{
struct
{
unsigned int length:8;
unsigned int vs_realloc:1;
unsigned int gs_realloc:1;
unsigned int clp_realloc:1;
unsigned int sf_realloc:1;
unsigned int vfe_realloc:1;
unsigned int cs_realloc:1;
unsigned int pad:2;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int vs_fence:10;
unsigned int gs_fence:10;
unsigned int clp_fence:10;
unsigned int pad:2;
} bits0;
 
struct
{
unsigned int sf_fence:10;
unsigned int vf_fence:10;
unsigned int cs_fence:10;
unsigned int pad:2;
} bits1;
};
 
struct gen4_constant_buffer_state /* previously gen4_command_streamer */
{
struct header header;
 
struct
{
unsigned int nr_urb_entries:3;
unsigned int pad:1;
unsigned int urb_entry_size:5;
unsigned int pad0:23;
} bits0;
};
 
struct gen4_constant_buffer
{
struct
{
unsigned int length:8;
unsigned int valid:1;
unsigned int pad:7;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int buffer_length:6;
unsigned int buffer_address:26;
} bits0;
};
 
struct gen4_state_base_address
{
struct header header;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int general_state_address:27;
} bits0;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int surface_state_address:27;
} bits1;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int indirect_object_state_address:27;
} bits2;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int general_state_upper_bound:20;
} bits3;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int indirect_object_state_upper_bound:20;
} bits4;
};
 
struct gen4_state_prefetch
{
struct header header;
 
struct
{
unsigned int prefetch_count:3;
unsigned int pad:3;
unsigned int prefetch_pointer:26;
} bits0;
};
 
struct gen4_system_instruction_pointer
{
struct header header;
 
struct
{
unsigned int pad:4;
unsigned int system_instruction_pointer:28;
} bits0;
};
 
 
 
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
unsigned int pad0:1;
unsigned int grf_reg_count:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer:26;
};
 
struct thread1
{
unsigned int ext_halt_exception_enable:1;
unsigned int sw_exception_enable:1;
unsigned int mask_stack_exception_enable:1;
unsigned int timeout_exception_enable:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad0:3;
unsigned int depth_coef_urb_read_offset:6; /* WM only */
unsigned int pad1:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad3:5;
unsigned int single_program_flow:1;
};
 
struct thread2
{
unsigned int per_thread_scratch_space:4;
unsigned int pad0:6;
unsigned int scratch_space_base_pointer:22;
};
 
struct thread3
{
unsigned int dispatch_grf_start_reg:4;
unsigned int urb_entry_read_offset:6;
unsigned int pad0:1;
unsigned int urb_entry_read_length:6;
unsigned int pad1:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int pad2:1;
unsigned int const_urb_entry_read_length:6;
unsigned int pad3:1;
};
 
 
 
struct gen4_clip_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:9;
unsigned int gs_output_stats:1; /* not always */
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6; /* may be less */
unsigned int pad3:1;
} thread4;
struct
{
unsigned int pad0:13;
unsigned int clip_mode:3;
unsigned int userclip_enable_flags:8;
unsigned int userclip_must_clip:1;
unsigned int pad1:1;
unsigned int guard_band_enable:1;
unsigned int viewport_z_clip_enable:1;
unsigned int viewport_xy_clip_enable:1;
unsigned int vertex_position_space:1;
unsigned int api_mode:1;
unsigned int pad2:1;
} clip5;
struct
{
unsigned int pad0:5;
unsigned int clipper_viewport_state_ptr:27;
} clip6;
 
float viewport_xmin;
float viewport_xmax;
float viewport_ymin;
float viewport_ymax;
};
 
 
 
struct gen4_cc_unit_state
{
struct
{
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} cc0;
 
struct
{
unsigned int bf_stencil_ref:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
unsigned int stencil_ref:8;
} cc1;
 
struct
{
unsigned int logicop_enable:1;
unsigned int pad0:10;
unsigned int depth_write_enable:1;
unsigned int depth_test_function:3;
unsigned int depth_test:1;
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
} cc2;
 
struct
{
unsigned int pad0:8;
unsigned int alpha_test_func:3;
unsigned int alpha_test:1;
unsigned int blend_enable:1;
unsigned int ia_blend_enable:1;
unsigned int pad1:1;
unsigned int alpha_test_format:1;
unsigned int pad2:16;
} cc3;
struct
{
unsigned int pad0:5;
unsigned int cc_viewport_state_offset:27;
} cc4;
struct
{
unsigned int pad0:2;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_src_blend_factor:5;
unsigned int ia_blend_function:3;
unsigned int statistics_enable:1;
unsigned int logicop_func:4;
unsigned int pad1:11;
unsigned int dither_enable:1;
} cc5;
 
struct
{
unsigned int clamp_post_alpha_blend:1;
unsigned int clamp_pre_alpha_blend:1;
unsigned int clamp_range:2;
unsigned int pad0:11;
unsigned int y_dither_offset:2;
unsigned int x_dither_offset:2;
unsigned int dest_blend_factor:5;
unsigned int src_blend_factor:5;
unsigned int blend_function:3;
} cc6;
 
struct {
union {
float f;
unsigned char ub[4];
} alpha_ref;
} cc7;
};
 
 
 
struct gen4_sf_unit_state
{
struct thread0 thread0;
struct {
unsigned int pad0:7;
unsigned int sw_exception_enable:1;
unsigned int pad1:3;
unsigned int mask_stack_exception_enable:1;
unsigned int pad2:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad3:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad4:5;
unsigned int single_program_flow:1;
} sf1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6;
unsigned int pad3:1;
} thread4;
 
struct
{
unsigned int front_winding:1;
unsigned int viewport_transform:1;
unsigned int pad0:3;
unsigned int sf_viewport_state_offset:27;
} sf5;
struct
{
unsigned int pad0:9;
unsigned int dest_org_vbias:4;
unsigned int dest_org_hbias:4;
unsigned int scissor:1;
unsigned int disable_2x2_trifilter:1;
unsigned int disable_zero_pix_trifilter:1;
unsigned int point_rast_rule:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int line_width:4;
unsigned int fast_scissor_disable:1;
unsigned int cull_mode:2;
unsigned int aa_enable:1;
} sf6;
 
struct
{
unsigned int point_size:11;
unsigned int use_point_size_state:1;
unsigned int subpixel_precision:1;
unsigned int sprite_point:1;
unsigned int pad0:11;
unsigned int trifan_pv:2;
unsigned int linestrip_pv:2;
unsigned int tristrip_pv:2;
unsigned int line_last_pixel_enable:1;
} sf7;
 
};
 
 
struct gen4_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:1;
unsigned int pad3:6;
} thread4;
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} gs5;
 
struct
{
unsigned int max_vp_index:4;
unsigned int pad0:26;
unsigned int reorder_enable:1;
unsigned int pad1:1;
} gs6;
};
 
 
struct gen4_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:4;
unsigned int pad3:3;
} thread4;
 
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} vs5;
 
struct
{
unsigned int vs_enable:1;
unsigned int vert_cache_disable:1;
unsigned int pad0:30;
} vs6;
};
 
 
struct gen4_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned int stats_enable:1;
unsigned int pad0:1;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} wm4;
struct
{
unsigned int enable_8_pix:1;
unsigned int enable_16_pix:1;
unsigned int enable_32_pix:1;
unsigned int pad0:7;
unsigned int legacy_global_depth_bias:1;
unsigned int line_stipple:1;
unsigned int depth_offset:1;
unsigned int polygon_stipple:1;
unsigned int line_aa_region_width:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int early_depth_test:1;
unsigned int thread_dispatch_enable:1;
unsigned int program_uses_depth:1;
unsigned int program_computes_depth:1;
unsigned int program_uses_killpixel:1;
unsigned int legacy_line_rast: 1;
unsigned int transposed_urb_read:1;
unsigned int max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_1:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_1:26;
} wm8;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_2:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_2:26;
} wm9;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_3:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_3:26;
} wm10;
};
 
struct gen4_wm_unit_state_padded {
struct gen4_wm_unit_state state;
char pad[64 - sizeof(struct gen4_wm_unit_state)];
};
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen4_sampler_default_border_color {
float color[4];
};
 
struct gen4_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen4_sampler_state
{
struct
{
unsigned int shadow_function:3;
unsigned int lod_bias:11;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad:1;
unsigned int lod_preclamp:1;
unsigned int border_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct
{
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:3;
unsigned int max_lod:10;
unsigned int min_lod:10;
} ss1;
 
struct
{
unsigned int pad:5;
unsigned int border_color_pointer:27;
} ss2;
struct
{
unsigned int pad:19;
unsigned int max_aniso:3;
unsigned int chroma_key_mode:1;
unsigned int chroma_key_index:2;
unsigned int chroma_key_enable:1;
unsigned int monochrome_filter_width:3;
unsigned int monochrome_filter_height:3;
} ss3;
};
 
 
struct gen4_clipper_viewport
{
float xmin;
float xmax;
float ymin;
float ymax;
};
 
struct gen4_cc_viewport
{
float min_depth;
float max_depth;
};
 
struct gen4_sf_viewport
{
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
 
struct {
short xmin;
short ymin;
short xmax;
short ymax;
} scissor;
};
 
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct gen4_surface_state
{
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad:3;
unsigned int render_cache_read_mode:1;
unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int color_blend:1;
unsigned int writedisable_blue:1;
unsigned int writedisable_green:1;
unsigned int writedisable_red:1;
unsigned int writedisable_alpha:1;
unsigned int surface_format:9;
unsigned int data_return_format:1;
unsigned int pad0:1;
unsigned int surface_type:3;
} ss0;
struct {
unsigned int base_addr;
} ss1;
struct {
unsigned int render_target_rotation:2;
unsigned int mip_count:4;
unsigned int width:13;
unsigned int height:13;
} ss2;
 
struct {
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad:1;
unsigned int pitch:18;
unsigned int depth:11;
} ss3;
struct {
unsigned int pad:19;
unsigned int min_array_elt:9;
unsigned int min_lod:4;
} ss4;
 
struct {
unsigned int pad:20;
unsigned int y_offset:4;
unsigned int pad2:1;
unsigned int x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN4_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN4_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN4_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN4_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN4_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN4_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN4_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN4_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN4_SURFACE_FORMAT_SHIFT 18
#define GEN4_SURFACE_FORMAT_MASK _MASK(26, 18)
 
#define GEN4_SURFACE_TYPE_SHIFT 29
#define GEN4_SURFACE_TYPE_MASK _MASK(31, 29)
#define GEN4_SURFACE_1D 0
#define GEN4_SURFACE_2D 1
#define GEN4_SURFACE_3D 2
#define GEN4_SURFACE_CUBE 3
#define GEN4_SURFACE_BUFFER 4
#define GEN4_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN4_SURFACE_HEIGHT_SHIFT 19
#define GEN4_SURFACE_HEIGHT_MASK _MASK(31, 19)
#define GEN4_SURFACE_WIDTH_SHIFT 6
#define GEN4_SURFACE_WIDTH_MASK _MASK(18, 6)
#define GEN4_SURFACE_LOD_SHIFT 2
#define GEN4_SURFACE_LOD_MASK _MASK(5, 2)
 
/* Surface state DW3 */
#define GEN4_SURFACE_DEPTH_SHIFT 21
#define GEN4_SURFACE_DEPTH_MASK _MASK(31, 21)
#define GEN4_SURFACE_PITCH_SHIFT 3
#define GEN4_SURFACE_PITCH_MASK _MASK(19, 3)
#define GEN4_SURFACE_TILED (1 << 1)
#define GEN4_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN4_SURFACE_MIN_LOD_SHIFT 28
#define GEN4_SURFACE_MIN_LOD_MASK _MASK(31, 28)
 
/* Surface state DW5 */
#define GEN4_SURFACE_X_OFFSET_SHIFT 25
#define GEN4_SURFACE_X_OFFSET_MASK _MASK(31, 25)
#define GEN4_SURFACE_Y_OFFSET_SHIFT 20
#define GEN4_SURFACE_Y_OFFSET_MASK _MASK(23, 20)
 
 
struct gen4_vertex_buffer_state
{
struct {
unsigned int pitch:11;
unsigned int pad:15;
unsigned int access_type:1;
unsigned int vb_index:5;
} vb0;
unsigned int start_addr;
unsigned int max_index;
#if 1
unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
#endif
};
 
#define GEN4_VBP_MAX 17
 
struct gen4_vb_array_state {
struct header header;
struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX];
};
 
 
struct gen4_vertex_element_state
{
struct
{
unsigned int src_offset:11;
unsigned int pad:5;
unsigned int src_format:9;
unsigned int pad0:1;
unsigned int valid:1;
unsigned int vertex_buffer_index:5;
} ve0;
struct
{
unsigned int dst_offset:8;
unsigned int pad:8;
unsigned int vfcomponent3:4;
unsigned int vfcomponent2:4;
unsigned int vfcomponent1:4;
unsigned int vfcomponent0:4;
} ve1;
};
 
#define GEN4_VEP_MAX 18
 
struct gen4_vertex_element_packet {
struct header header;
struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
};
 
 
struct gen4_urb_immediate {
unsigned int opcode:4;
unsigned int offset:6;
unsigned int swizzle_control:2;
unsigned int pad:1;
unsigned int allocate:1;
unsigned int used:1;
unsigned int complete:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct gen4_instruction
{
struct
{
unsigned int opcode:7;
unsigned int pad:1;
unsigned int access_mode:1;
unsigned int mask_control:1;
unsigned int dependency_control:2;
unsigned int compression_control:2;
unsigned int thread_control:2;
unsigned int predicate_control:4;
unsigned int predicate_inverse:1;
unsigned int execution_size:3;
unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
unsigned int pad0:2;
unsigned int debug_control:1;
unsigned int saturate:1;
} header;
 
union {
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad:1;
unsigned int dest_subreg_nr:5;
unsigned int dest_reg_nr:8;
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} da1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad:6;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} ia1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad0:1;
unsigned int dest_writemask:4;
unsigned int dest_subreg_nr:1;
unsigned int dest_reg_nr:8;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} da16;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad0:6;
unsigned int dest_writemask:4;
int dest_indirect_offset:6;
unsigned int dest_subreg_nr:3;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} ia16;
} bits1;
 
 
union {
struct
{
unsigned int src0_subreg_nr:5;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} da1;
 
struct
{
int src0_indirect_offset:10;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} ia1;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
unsigned int src0_subreg_nr:1;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} da16;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
int src0_indirect_offset:6;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia16;
 
} bits2;
 
union
{
struct
{
unsigned int src1_subreg_nr:5;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int pad0:7;
} da1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
unsigned int src1_subreg_nr:1;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int pad2:7;
} da16;
 
struct
{
int src1_indirect_offset:10;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
int src1_indirect_offset:6;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad2:6;
} ia16;
 
 
struct
{
int jump_count:16; /* note: signed */
unsigned int pop_count:4;
unsigned int pad0:12;
} if_else;
 
struct {
unsigned int function:4;
unsigned int int_type:1;
unsigned int precision:1;
unsigned int saturate:1;
unsigned int data_type:1;
unsigned int pad0:8;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} math;
 
struct {
unsigned int binding_table_index:8;
unsigned int sampler:4;
unsigned int return_format:2;
unsigned int msg_type:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} sampler;
 
struct gen4_urb_immediate urb;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:4;
unsigned int msg_type:2;
unsigned int target_cache:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_read;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:3;
unsigned int pixel_scoreboard_clear:1;
unsigned int msg_type:3;
unsigned int send_commit_msg:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_write;
 
struct {
unsigned int pad:16;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} generic;
 
unsigned int ud;
} bits3;
};
 
/* media pipeline */
 
struct gen4_vfe_state {
struct {
unsigned int per_thread_scratch_space:4;
unsigned int pad3:3;
unsigned int extend_vfe_state_present:1;
unsigned int pad2:2;
unsigned int scratch_base:22;
} vfe0;
 
struct {
unsigned int debug_counter_control:2;
unsigned int children_present:1;
unsigned int vfe_mode:4;
unsigned int pad2:2;
unsigned int num_urb_entries:7;
unsigned int urb_entry_alloc_size:9;
unsigned int max_threads:7;
} vfe1;
 
struct {
unsigned int pad4:4;
unsigned int interface_descriptor_base:28;
} vfe2;
};
 
struct gen4_vld_state {
struct {
unsigned int pad6:6;
unsigned int scan_order:1;
unsigned int intra_vlc_format:1;
unsigned int quantizer_scale_type:1;
unsigned int concealment_motion_vector:1;
unsigned int frame_predict_frame_dct:1;
unsigned int top_field_first:1;
unsigned int picture_structure:2;
unsigned int intra_dc_precision:2;
unsigned int f_code_0_0:4;
unsigned int f_code_0_1:4;
unsigned int f_code_1_0:4;
unsigned int f_code_1_1:4;
} vld0;
 
struct {
unsigned int pad2:9;
unsigned int picture_coding_type:2;
unsigned int pad:21;
} vld1;
 
struct {
unsigned int index_0:4;
unsigned int index_1:4;
unsigned int index_2:4;
unsigned int index_3:4;
unsigned int index_4:4;
unsigned int index_5:4;
unsigned int index_6:4;
unsigned int index_7:4;
} desc_remap_table0;
 
struct {
unsigned int index_8:4;
unsigned int index_9:4;
unsigned int index_10:4;
unsigned int index_11:4;
unsigned int index_12:4;
unsigned int index_13:4;
unsigned int index_14:4;
unsigned int index_15:4;
} desc_remap_table1;
};
 
struct gen4_interface_descriptor {
struct {
unsigned int grf_reg_blocks:4;
unsigned int pad:2;
unsigned int kernel_start_pointer:26;
} desc0;
 
struct {
unsigned int pad:7;
unsigned int software_exception:1;
unsigned int pad2:3;
unsigned int maskstack_exception:1;
unsigned int pad3:1;
unsigned int illegal_opcode_exception:1;
unsigned int pad4:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int single_program_flow:1;
unsigned int pad5:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int const_urb_entry_read_len:6;
} desc1;
 
struct {
unsigned int pad:2;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} desc2;
 
struct {
unsigned int binding_table_entry_count:5;
unsigned int binding_table_pointer:27;
} desc3;
};
 
struct gen6_blend_state
{
struct {
unsigned int dest_blend_factor:5;
unsigned int source_blend_factor:5;
unsigned int pad3:1;
unsigned int blend_func:3;
unsigned int pad2:1;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_source_blend_factor:5;
unsigned int pad1:1;
unsigned int ia_blend_func:3;
unsigned int pad0:1;
unsigned int ia_blend_enable:1;
unsigned int blend_enable:1;
} blend0;
 
struct {
unsigned int post_blend_clamp_enable:1;
unsigned int pre_blend_clamp_enable:1;
unsigned int clamp_range:2;
unsigned int pad0:4;
unsigned int x_dither_offset:2;
unsigned int y_dither_offset:2;
unsigned int dither_enable:1;
unsigned int alpha_test_func:3;
unsigned int alpha_test_enable:1;
unsigned int pad1:1;
unsigned int logic_op_func:4;
unsigned int logic_op_enable:1;
unsigned int pad2:1;
unsigned int write_disable_b:1;
unsigned int write_disable_g:1;
unsigned int write_disable_r:1;
unsigned int write_disable_a:1;
unsigned int pad3:1;
unsigned int alpha_to_coverage_dither:1;
unsigned int alpha_to_one:1;
unsigned int alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
unsigned int alpha_test_format:1;
unsigned int pad0:14;
unsigned int round_disable:1;
unsigned int bf_stencil_ref:8;
unsigned int stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
unsigned int ui:8;
unsigned int pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} ds0;
 
struct {
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
} ds1;
 
struct {
unsigned int pad0:26;
unsigned int depth_write_enable:1;
unsigned int depth_test_func:3;
unsigned int pad1:1;
unsigned int depth_test_enable:1;
} ds2;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
typedef enum {
WM_KERNEL = 0,
WM_KERNEL_P,
 
WM_KERNEL_MASK,
WM_KERNEL_MASK_P,
 
WM_KERNEL_MASKCA,
WM_KERNEL_MASKCA_P,
 
WM_KERNEL_MASKSA,
WM_KERNEL_MASKSA_P,
 
WM_KERNEL_OPACITY,
WM_KERNEL_OPACITY_P,
 
WM_KERNEL_VIDEO_PLANAR,
WM_KERNEL_VIDEO_PACKED,
KERNEL_COUNT
} wm_kernel_t;
 
#endif
/drivers/video/Intel-2D/sna/gen4_source.h
0,0 → 1,22
#ifndef GEN4_SOURCE_H
#define GEN4_SOURCE_H
 
#include "compiler.h"
 
#include "sna.h"
#include "sna_render.h"
 
bool
gen4_channel_init_solid(struct sna *sna,
struct sna_composite_channel *channel,
uint32_t color);
 
bool
gen4_channel_init_linear(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y);
 
#endif /* GEN4_SOURCE_H */
/drivers/video/Intel-2D/sna/gen4_vertex.c
0,0 → 1,1868
/*
* Copyright © 2012 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_render.h"
#include "sna_render_inline.h"
#include "gen4_vertex.h"
 
#ifndef sse2
#define sse2
#endif
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
 
assert(sna->render.vertex_offset);
assert(sna->render.vertex_index > sna->render.vertex_start);
 
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render.vertex_offset = 0;
}
 
int gen4_vertex_finish(struct sna *sna)
{
struct kgem_bo *bo;
unsigned int i;
unsigned hint, size;
 
DBG(("%s: used=%d / %d\n", __FUNCTION__,
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
 
sna_vertex_wait__locked(&sna->render);
 
/* Note: we only need dword alignment (currently) */
 
bo = sna->render.vbo;
if (bo) {
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
 
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
}
 
assert(!sna->render.active);
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render.vbo = NULL;
sna->render.vb_id = 0;
 
kgem_bo_destroy(&sna->kgem, bo);
}
 
hint = CREATE_GTT_MAP;
if (bo)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
 
size = 256*1024;
assert(!sna->render.active);
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
while (sna->render.vbo == NULL && size > 16*1024) {
size /= 2;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
}
if (sna->render.vbo == NULL)
sna->render.vbo = kgem_create_linear(&sna->kgem,
256*1024, CREATE_GTT_MAP);
if (sna->render.vbo)
sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
if (sna->render.vbo) {
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
}
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
return 0;
}
 
if (sna->render.vertex_used) {
DBG(("%s: copying initial buffer x %d to handle=%d\n",
__FUNCTION__,
sna->render.vertex_used,
sna->render.vbo->handle));
assert(sizeof(float)*sna->render.vertex_used <=
__kgem_bo_size(sna->render.vbo));
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
}
 
size = __kgem_bo_size(sna->render.vbo)/4;
if (size >= UINT16_MAX)
size = UINT16_MAX - 1;
 
DBG(("%s: create vbo handle=%d, size=%d\n",
__FUNCTION__, sna->render.vbo->handle, size));
 
sna->render.vertex_size = size;
return sna->render.vertex_size - sna->render.vertex_used;
}
 
void gen4_vertex_close(struct sna *sna)
{
struct kgem_bo *bo, *free_bo = NULL;
unsigned int i, delta = 0;
 
assert(sna->render.vertex_offset == 0);
if (!sna->render.vb_id)
return;
 
DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
sna->render.vb_id, sna->render.nvertex_reloc));
 
assert(!sna->render.active);
 
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
} else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) {
DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
sna->render.vertices =
kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
free_bo = bo;
}
 
}
} else {
if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
sna->render.vertex_used, sna->kgem.nbatch));
memcpy(sna->kgem.batch + sna->kgem.nbatch,
sna->render.vertex_data,
sna->render.vertex_used * 4);
delta = sna->kgem.nbatch * 4;
bo = NULL;
sna->kgem.nbatch += sna->render.vertex_used;
} else {
bo = kgem_create_linear(&sna->kgem,
4*sna->render.vertex_used,
CREATE_NO_THROTTLE);
if (bo && !kgem_bo_write(&sna->kgem, bo,
sna->render.vertex_data,
4*sna->render.vertex_used)) {
kgem_bo_destroy(&sna->kgem, bo);
bo = NULL;
}
DBG(("%s: new vbo: %d\n", __FUNCTION__,
sna->render.vertex_used));
free_bo = bo;
}
}
 
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
 
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
}
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
 
if (sna->render.vbo == NULL) {
assert(!sna->render.active);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
}
 
if (free_bo)
kgem_bo_destroy(&sna->kgem, free_bo);
}
 
/* specialised vertex emission routines */
 
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
force_inline static float
compute_linear(const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
return ((x+channel->offset[0]) * channel->u.linear.dx +
(y+channel->offset[1]) * channel->u.linear.dy +
channel->u.linear.offset);
}
 
sse2 inline static void
emit_texcoord(struct sna *sna,
const struct sna_composite_channel *channel,
int16_t x, int16_t y)
{
if (channel->is_solid) {
OUT_VERTEX_F(x);
return;
}
 
x += channel->offset[0];
y += channel->offset[1];
 
if (channel->is_affine) {
float s, t;
 
sna_get_transformed_coordinates(x, y,
channel->transform,
&s, &t);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
} else {
float s, t, w;
 
sna_get_transformed_coordinates_3d(x, y,
channel->transform,
&s, &t, &w);
OUT_VERTEX_F(s * channel->scale[0]);
OUT_VERTEX_F(t * channel->scale[1]);
OUT_VERTEX_F(w);
}
}
 
sse2 force_inline static void
emit_vertex(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
}
 
sse2 fastcall static void
emit_primitive(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 force_inline static void
emit_vertex_mask(struct sna *sna,
const struct sna_composite_op *op,
int16_t srcX, int16_t srcY,
int16_t mskX, int16_t mskY,
int16_t dstX, int16_t dstY)
{
OUT_VERTEX(dstX, dstY);
emit_texcoord(sna, &op->src, srcX, srcY);
emit_texcoord(sna, &op->mask, mskX, mskY);
}
 
sse2 fastcall static void
emit_primitive_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
emit_vertex_mask(sna, op,
r->src.x + r->width, r->src.y + r->height,
r->mask.x + r->width, r->mask.y + r->height,
r->dst.x + r->width, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y + r->height,
r->mask.x, r->mask.y + r->height,
r->dst.x, r->dst.y + r->height);
emit_vertex_mask(sna, op,
r->src.x, r->src.y,
r->mask.x, r->mask.y,
r->dst.x, r->dst.y);
}
 
sse2 fastcall static void
emit_primitive_solid(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_solid(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[5] = v[3] = v[1] = .5;
box++;
v += 6;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse2 fastcall static void
emit_boxes_identity_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_simple_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse2 fastcall static void
emit_boxes_simple_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_affine_source(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = r->dst.x;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = r->dst.y;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[7], &v[8]);
}
 
sse2 fastcall static void
emit_boxes_affine_source(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x2,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[1], &v[2]);
 
dst.p.x = box->x1;
v[3] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y2,
op->src.transform, op->src.scale,
&v[4], &v[5]);
 
dst.p.y = box->y1;
v[6] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
op->src.offset[1] + box->y1,
op->src.transform, op->src.scale,
&v[7], &v[8]);
box++;
v += 9;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse2 fastcall static void
emit_boxes_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_linear_identity_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse2 fastcall static void
emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
sse2 fastcall static void
emit_primitive_identity_source_mask(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
 
src_x = r->src.x + op->src.offset[0];
src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 15;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = (src_x + w) * op->src.scale[0];
v[2] = (src_y + h) * op->src.scale[1];
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = src_x * op->src.scale[0];
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = src_y * op->src.scale[1];
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
sse2 fastcall static void
emit_primitive_simple_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
v[6] = ((r->src.x + tx) * xx + x0) * sx;
v[7] = v[2];
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
v[11] = v[6];
v[12] = ((r->src.y + ty) * yy + y0) * sy;
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
sse2 fastcall static void
emit_primitive_affine_source_identity(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x = r->mask.x + op->mask.offset[0];
float msk_y = r->mask.y + op->mask.offset[1];
float w = r->width, h = r->height;
 
assert(op->floats_per_rect == 15);
assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[1], &v[2]);
v[3] = (msk_x + w) * op->mask.scale[0];
v[4] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[5] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y + r->height,
op->src.transform, op->src.scale,
&v[6], &v[7]);
v[8] = msk_x * op->mask.scale[0];
v[9] = v[4];
 
dst.p.y = r->dst.y;
v[10] = dst.f;
_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
op->src.offset[1] + r->src.y,
op->src.transform, op->src.scale,
&v[11], &v[12]);
v[13] = v[8];
v[14] = msk_y * op->mask.scale[1];
}
 
/* SSE4_2 */
#if defined(sse4_2)
 
sse4_2 fastcall static void
emit_primitive_linear__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
sse4_2 fastcall static void
emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_simple_source__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
sse4_2 fastcall static void
emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
sse4_2 fastcall static void
emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
sse4_2 fastcall static void
emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
sse4_2 fastcall static void
emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
/* AVX2 */
#if defined(avx2)
 
avx2 fastcall static void
emit_primitive_linear__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
assert(op->floats_per_rect == 6);
assert((sna->render.vertex_used % 2) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[2] = dst.f;
dst.p.y = r->dst.y;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
union {
struct sna_coordinate p;
float f;
} dst;
 
do {
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[2] = dst.f;
dst.p.y = box->y1;
v[4] = dst.f;
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[3] = compute_linear(&op->src, box->x1, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y1);
 
v += 6;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float *v;
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
dst.p.x = r->dst.x;
v[3] = dst.f;
dst.p.y = r->dst.y;
v[6] = dst.f;
 
v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
v[1] = v[4] + r->width * op->src.scale[0];
 
v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
 
avx2 fastcall static void
emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
dst.p.x = box->x1;
v[3] = dst.f;
dst.p.y = box->y1;
v[6] = dst.f;
 
v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
 
v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_simple_source__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
float *v;
union {
struct sna_coordinate p;
float f;
} dst;
 
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
assert(op->floats_per_rect == 9);
assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*3;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
dst.p.x = r->dst.x;
v[3] = dst.f;
v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
dst.p.y = r->dst.y;
v[6] = dst.f;
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
 
avx2 fastcall static void
emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
float yy = op->src.transform->matrix[1][1];
float y0 = op->src.transform->matrix[1][2];
float sx = op->src.scale[0];
float sy = op->src.scale[1];
int16_t tx = op->src.offset[0];
int16_t ty = op->src.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
dst.p.x = box->x1;
v[3] = dst.f;
v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
dst.p.y = box->y1;
v[6] = dst.f;
v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
v += 9;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
}
 
avx2 fastcall static void
emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
 
avx2 fastcall static void
emit_primitive_linear_identity_mask__avx2(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
float msk_x, msk_y;
float w, h;
float *v;
 
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
 
DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
__FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
assert(op->floats_per_rect == 12);
assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 12;
 
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
v[2] = (msk_x + w) * op->mask.scale[0];
v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
dst.p.x = r->dst.x;
v[4] = dst.f;
v[10] = v[6] = msk_x * op->mask.scale[0];
 
dst.p.y = r->dst.y;
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
 
avx2 fastcall static void
emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
const BoxRec *box, int nbox,
float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
 
do {
union {
struct sna_coordinate p;
float f;
} dst;
 
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[2] = (msk_x + box->x2) * op->mask.scale[0];
v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
dst.p.x = box->x1;
v[4] = dst.f;
v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
dst.p.y = box->y1;
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
v[1] = compute_linear(&op->src, box->x2, box->y2);
v[5] = compute_linear(&op->src, box->x1, box->y2);
v[9] = compute_linear(&op->src, box->x1, box->y1);
 
v += 12;
box++;
} while (--nbox);
}
 
#endif
 
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
{
unsigned vb;
 
if (tmp->mask.bo) {
if (tmp->mask.transform == NULL) {
if (tmp->src.is_solid) {
DBG(("%s: solid, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, identity mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear_identity_mask;
}
tmp->floats_per_vertex = 4;
vb = 1 | 2 << 2;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_identity_source_mask;
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_simple_source_identity;
} else {
DBG(("%s: affine src, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source_identity;
}
tmp->floats_per_vertex = 5;
vb = 2 << 2 | 2;
} else {
DBG(("%s: projective source, identity mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 6;
vb = 2 << 2 | 3;
}
} else {
tmp->prim_emit = emit_primitive_mask;
tmp->floats_per_vertex = 1;
vb = 0;
if (tmp->mask.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1 << 2;
} else if (tmp->mask.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 << 2;
}else {
tmp->floats_per_vertex += 3;
vb |= 3 << 2;
}
if (tmp->src.is_solid) {
tmp->floats_per_vertex += 1;
vb |= 1;
} else if (tmp->src.is_affine) {
tmp->floats_per_vertex += 2;
vb |= 2 ;
}else {
tmp->floats_per_vertex += 3;
vb |= 3;
}
DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
__FUNCTION__,tmp->floats_per_vertex, vb));
}
} else {
#if 0
if (tmp->src.is_solid) {
DBG(("%s: solid, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_solid;
if (tmp->src.is_opaque && tmp->op == PictOpOver)
tmp->op = PictOpSrc;
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.is_linear) {
DBG(("%s: linear, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_linear__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_linear__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_linear;
}
tmp->floats_per_vertex = 2;
vb = 1;
} else if (tmp->src.transform == NULL) {
DBG(("%s: identity src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_identity_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_identity_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_identity_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else if (tmp->src.is_affine) {
tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
DBG(("%s: simple src, no mask\n", __FUNCTION__));
#if defined(avx2)
if (sna->cpu_features & AVX2) {
tmp->prim_emit = emit_primitive_simple_source__avx2;
} else
#endif
#if defined(sse4_2)
if (sna->cpu_features & SSE4_2) {
tmp->prim_emit = emit_primitive_simple_source__sse4_2;
} else
#endif
{
tmp->prim_emit = emit_primitive_simple_source;
}
} else {
DBG(("%s: affine src, no mask\n", __FUNCTION__));
tmp->prim_emit = emit_primitive_affine_source;
}
tmp->floats_per_vertex = 3;
vb = 2;
} else {
DBG(("%s: projective src, no mask\n", __FUNCTION__));
assert(!tmp->src.is_solid);
tmp->prim_emit = emit_primitive;
tmp->floats_per_vertex = 4;
vb = 3;
}
#endif
}
tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
 
return vb;
}
 
 
 
/drivers/video/Intel-2D/sna/gen4_vertex.h
0,0 → 1,15
#ifndef GEN4_VERTEX_H
#define GEN4_VERTEX_H
 
#include "compiler.h"
 
#include "sna.h"
#include "sna_render.h"
 
void gen4_vertex_flush(struct sna *sna);
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
 
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp);
 
#endif /* GEN4_VERTEX_H */
/drivers/video/Intel-2D/sna/gen5_render.c
0,0 → 1,2473
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen5_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
 
#define PREFER_BLT_FILL 1
 
#define DBG_NO_STATE_CACHE 0
#define DBG_NO_SURFACE_CACHE 0
 
#define MAX_3D_SIZE 8192
 
#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
 
/* Set up a default static partitioning of the URB, which is supposed to
* allow anything we would want to do, at potentially lower performance.
*/
#define URB_CS_ENTRY_SIZE 1
#define URB_CS_ENTRIES 0
 
#define URB_VS_ENTRY_SIZE 1
#define URB_VS_ENTRIES 256 /* minimum of 8 */
 
#define URB_GS_ENTRY_SIZE 0
#define URB_GS_ENTRIES 0
 
#define URB_CLIP_ENTRY_SIZE 0
#define URB_CLIP_ENTRIES 0
 
#define URB_SF_ENTRY_SIZE 2
#define URB_SF_ENTRIES 64
 
/*
* this program computes dA/dx and dA/dy for the texture coordinates along
* with the base texture coordinate. It was extracted from the Mesa driver
*/
 
#define SF_KERNEL_NUM_GRF 16
#define SF_MAX_THREADS 48
 
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 72
 
static const uint32_t ps_kernel_packed_static[][4] = {
#include "exa_wm_xy.g5b"
#include "exa_wm_src_affine.g5b"
#include "exa_wm_src_sample_argb.g5b"
#include "exa_wm_yuv_rgb.g5b"
#include "exa_wm_write.g5b"
};
 
static const uint32_t ps_kernel_planar_static[][4] = {
#include "exa_wm_xy.g5b"
#include "exa_wm_src_affine.g5b"
#include "exa_wm_src_sample_planar.g5b"
#include "exa_wm_yuv_rgb.g5b"
#include "exa_wm_write.g5b"
};
 
#define NOKERNEL(kernel_enum, func, masked) \
[kernel_enum] = {func, 0, masked}
#define KERNEL(kernel_enum, kernel, masked) \
[kernel_enum] = {&kernel, sizeof(kernel), masked}
static const struct wm_kernel_info {
const void *data;
unsigned int size;
bool has_mask;
} wm_kernels[] = {
NOKERNEL(WM_KERNEL, brw_wm_kernel__affine, false),
NOKERNEL(WM_KERNEL_P, brw_wm_kernel__projective, false),
 
NOKERNEL(WM_KERNEL_MASK, brw_wm_kernel__affine_mask, true),
NOKERNEL(WM_KERNEL_MASK_P, brw_wm_kernel__projective_mask, true),
 
NOKERNEL(WM_KERNEL_MASKCA, brw_wm_kernel__affine_mask_ca, true),
NOKERNEL(WM_KERNEL_MASKCA_P, brw_wm_kernel__projective_mask_ca, true),
 
NOKERNEL(WM_KERNEL_MASKSA, brw_wm_kernel__affine_mask_sa, true),
NOKERNEL(WM_KERNEL_MASKSA_P, brw_wm_kernel__projective_mask_sa, true),
 
NOKERNEL(WM_KERNEL_OPACITY, brw_wm_kernel__affine_opacity, true),
NOKERNEL(WM_KERNEL_OPACITY_P, brw_wm_kernel__projective_opacity, true),
 
KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, false),
KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, false),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen5_blend_op[] = {
/* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO},
/* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE},
/* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE},
/* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen5_blend_op.
*
* This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR,
* GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64)
 
#define SAMPLER_OFFSET(sf, se, mf, me, k) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64)
 
static bool
gen5_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel);
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
}
 
static int
gen5_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen5_blend_op[op].src_alpha)
base = WM_KERNEL_MASKSA;
else
base = WM_KERNEL_MASKCA;
} else
base = WM_KERNEL_MASK;
} else
base = WM_KERNEL;
 
return base + !is_affine;
}
 
static bool gen5_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen5_render_state *state = &sna->render_state.gen5;
 
if (!op->need_magic_ca_pass)
return false;
 
assert(sna->render.vertex_index > sna->render.vertex_start);
 
DBG(("%s: CA fixup\n", __FUNCTION__));
assert(op->mask.bo != NULL);
assert(op->has_component_alpha);
 
gen5_emit_pipelined_pointers
(sna, op, PictOpAdd,
gen5_choose_composite_kernel(PictOpAdd,
true, true, op->is_affine));
 
OUT_BATCH(GEN5_3DPRIMITIVE |
GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static uint32_t gen5_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
src = GEN5_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN5_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
#if 0
/* If there's no dst alpha channel, adjust the blend op so that we'll treat
* it as always 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN5_BLENDFACTOR_DST_ALPHA)
src = GEN5_BLENDFACTOR_ONE;
else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA)
src = GEN5_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen5_blend_op[op].src_alpha) {
if (dst == GEN5_BLENDFACTOR_SRC_ALPHA)
dst = GEN5_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN5_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen5_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN5_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
static uint32_t gen5_get_dest_format(PictFormat format)
{
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
#if 0
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN5_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM;
}
#endif
}
typedef struct gen5_surface_state_padded {
struct gen5_surface_state state;
char pad[32 - sizeof(struct gen5_surface_state)];
} gen5_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen5_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR;
break;
}
}
 
static uint32_t
gen5_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN5_SURFACE_TILED;
case I915_TILING_Y: return GEN5_SURFACE_TILED | GEN5_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen5_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t domains;
uint16_t offset;
uint32_t *ss;
 
/* After the first bind, we manage the cache domains within the batch */
if (!DBG_NO_SURFACE_CACHE) {
offset = kgem_bo_get_binding(bo, format | is_dst << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
}
 
offset = sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
 
ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
GEN5_SURFACE_BLEND_ENABLED |
format << GEN5_SURFACE_FORMAT_SHIFT);
 
if (is_dst) {
ss[0] |= GEN5_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER;
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
 
ss[2] = ((width - 1) << GEN5_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
ss[3] = (gen5_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN5_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = 0;
 
kgem_bo_set_binding(bo, format | is_dst << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen5_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen5.ve_id;
 
assert((sna->render.vb_id & (1 << id)) == 0);
 
OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
assert(sna->render.nvertex_reloc < ARRAY_SIZE(sna->render.vertex_reloc));
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen5_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN5_3DPRIMITIVE |
GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL |
(_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) |
(0 << 9) |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen5.last_primitive = sna->kgem.nbatch;
}
 
static bool gen5_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = op->u.gen5.ve_id;
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 20 : 6;
if ((sna->render.vb_id & (1 << id)) == 0)
ndwords += 5;
 
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & (1 << id)) == 0)
gen5_emit_vertex_buffer(sna, op);
if (sna->render.vertex_offset == 0)
gen5_emit_primitive(sna);
 
return true;
}
 
static int gen5_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 20 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen5_magic_ca_pass(sna, op))
gen5_emit_pipelined_pointers(sna, op, op->op,
op->u.gen5.wm_kernel);
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen5_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna,
const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen5_get_rectangles__flush(sna, op);
if (unlikely (rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen5_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
static uint32_t *
gen5_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
/* Clear all surplus entries to zero in case of prefetch */
*offset = sna->kgem.surface;
return memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen5_surface_state_padded));
}
 
static void
gen5_emit_urb(struct sna *sna)
{
int urb_vs_start, urb_vs_size;
int urb_gs_start, urb_gs_size;
int urb_clip_start, urb_clip_size;
int urb_sf_start, urb_sf_size;
int urb_cs_start, urb_cs_size;
 
urb_vs_start = 0;
urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
urb_gs_start = urb_vs_start + urb_vs_size;
urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
urb_clip_start = urb_gs_start + urb_gs_size;
urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
urb_sf_start = urb_clip_start + urb_clip_size;
urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
OUT_BATCH(GEN5_URB_FENCE |
UF0_CS_REALLOC |
UF0_SF_REALLOC |
UF0_CLIP_REALLOC |
UF0_GS_REALLOC |
UF0_VS_REALLOC |
1);
OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
 
/* Constant buffer state */
OUT_BATCH(GEN5_CS_URB_STATE | 0);
OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0);
}
 
static void
gen5_emit_state_base_address(struct sna *sna)
{
assert(sna->render_state.gen5.general_bo->proxy == NULL);
OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6);
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */
sna->kgem.nbatch,
sna->render_state.gen5.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* media */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen5.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, all disabled */
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen5_emit_invariant(struct sna *sna)
{
/* Ironlake errata workaround: Before disabling the clipper,
* you have to MI_FLUSH to get the pipeline idle.
*
* However, the kernel flushes the pipeline between batches,
* so we should be safe....
*
* On the other hand, after using BLT we must use a non-pipelined
* operation...
*/
if (sna->kgem.nreloc)
OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH);
 
OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
gen5_emit_state_base_address(sna);
 
sna->render_state.gen5.needs_invariant = false;
}
 
static void
gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen5.needs_invariant)
gen5_emit_invariant(sna);
}
 
static void
gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert(op->floats_per_rect == 3*op->floats_per_vertex);
if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen5.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex;
}
}
 
static void
gen5_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.surface_table == offset)
return;
 
sna->render_state.gen5.surface_table = offset;
 
/* Binding table pointers */
OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4);
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
OUT_BATCH(0); /* clip */
OUT_BATCH(0); /* sf */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
}
 
static bool
gen5_emit_pipelined_pointers(struct sna *sna,
const struct sna_composite_op *op,
int blend, int kernel)
{
uint16_t sp, bp;
uint32_t key;
 
DBG(("%s: has_mask=%d, src=(%d, %d), mask=(%d, %d),kernel=%d, blend=%d, ca=%d, format=%x\n",
__FUNCTION__, op->u.gen5.ve_id & 2,
op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel, blend, op->has_component_alpha, (int)op->dst.format));
 
sp = SAMPLER_OFFSET(op->src.filter, op->src.repeat,
op->mask.filter, op->mask.repeat,
kernel);
bp = gen5_get_blend(blend, op->has_component_alpha, op->dst.format);
 
key = sp | (uint32_t)bp << 16 | (op->mask.bo != NULL) << 31;
DBG(("%s: sp=%d, bp=%d, key=%08x (current sp=%d, bp=%d, key=%08x)\n",
__FUNCTION__, sp, bp, key,
sna->render_state.gen5.last_pipelined_pointers & 0xffff,
(sna->render_state.gen5.last_pipelined_pointers >> 16) & 0x7fff,
sna->render_state.gen5.last_pipelined_pointers));
if (key == sna->render_state.gen5.last_pipelined_pointers)
return false;
 
OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5);
OUT_BATCH(sna->render_state.gen5.vs);
OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */
OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */
OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]);
OUT_BATCH(sna->render_state.gen5.wm + sp);
OUT_BATCH(sna->render_state.gen5.cc + bp);
 
bp = (sna->render_state.gen5.last_pipelined_pointers & 0x7fff0000) != ((uint32_t)bp << 16);
sna->render_state.gen5.last_pipelined_pointers = key;
 
gen5_emit_urb(sna);
 
return bp;
}
 
static bool
gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (!DBG_NO_STATE_CACHE &&
sna->render_state.gen5.drawrect_limit == limit &&
sna->render_state.gen5.drawrect_offset == offset)
return false;
 
sna->render_state.gen5.drawrect_offset = offset;
sna->render_state.gen5.drawrect_limit = limit;
 
OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0x00000000);
OUT_BATCH(limit);
OUT_BATCH(offset);
return true;
}
 
static void
gen5_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen5_render_state *render = &sna->render_state.gen5;
int id = op->u.gen5.ve_id;
bool has_mask = id >> 2;
uint32_t format, dw;
 
if (!DBG_NO_STATE_CACHE && render->ve_id == id)
return;
 
DBG(("%s: changing %d -> %d\n", __FUNCTION__, render->ve_id, id));
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS |
((2 * (has_mask ? 4 : 3)) + 1 - 2));
 
OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID |
(GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) |
(0 << VE0_OFFSET_SHIFT));
OUT_BATCH((VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
(VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: id=%d, first channel %d floats, offset=4b\n", __FUNCTION__,
id, id & 3));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
format = GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
format | 4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: id=%x, second channel %d floats, offset=%db\n", __FUNCTION__,
id, id >> 2, offset));
dw = VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
format = GEN5_SURFACEFORMAT_R32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
format = GEN5_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT << VE0_FORMAT_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
format | offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
inline static void
gen5_emit_pipe_flush(struct sna *sna)
{
OUT_BATCH(GEN5_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN5_PIPE_CONTROL_WC_FLUSH);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen5_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t offset)
{
bool flush = false;
 
assert(op->dst.bo->exec);
 
/* drawrect must be first for Ironlake BLT workaround */
if (gen5_emit_drawing_rectangle(sna, op))
offset &= ~1;
gen5_emit_binding_table(sna, offset & ~1);
if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)){
DBG(("%s: changed blend state, flush required? %d\n",
__FUNCTION__, (offset & 1) && op->op > PictOpSrc));
flush = (offset & 1) && op->op > PictOpSrc;
}
gen5_emit_vertex_elements(sna, op);
 
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
DBG(("%s: flushing dirty (%d, %d)\n", __FUNCTION__,
kgem_bo_is_dirty(op->src.bo),
kgem_bo_is_dirty(op->mask.bo)));
OUT_BATCH(MI_FLUSH);
kgem_clear_dirty(&sna->kgem);
kgem_bo_mark_dirty(op->dst.bo);
flush = false;
}
if (flush) {
DBG(("%s: forcing flush\n", __FUNCTION__));
gen5_emit_pipe_flush(sna);
}
}
 
static void gen5_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
uint32_t *binding_table;
uint16_t offset;
 
gen5_get_batch(sna, op);
 
binding_table = gen5_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen5_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen5_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
assert(op->u.gen5.ve_id >> 2);
binding_table[2] =
gen5_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen5.surface_table;
}
 
gen5_emit_state(sna, op, offset | dirty);
}
 
fastcall static void
gen5_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n",
__FUNCTION__,
r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
r->dst.x, r->dst.y, op->dst.x, op->dst.y,
r->width, r->height));
 
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen5_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen5_get_rectangles(sna, op, 1, gen5_bind_surfaces);
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen5_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n",
__FUNCTION__, nbox, op->dst.x, op->dst.y,
op->src.offset[0], op->src.offset[1],
op->src.width, op->src.height,
op->mask.offset[0], op->mask.offset[1],
op->mask.width, op->mask.height));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.mask = r.src = r.dst;
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen5_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen5_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, op, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t gen5_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen5_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN5_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
ss->ss0.color_blend = 1;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen5_video_bind_surfaces(struct sna *sna,
const struct sna_composite_op *op)
{
bool dirty = kgem_bo_is_dirty(op->dst.bo);
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
gen5_get_batch(sna, op);
 
binding_table = gen5_composite_get_binding_table(sna, &offset);
binding_table[0] =
gen5_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen5_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen5_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen5_emit_state(sna, op, offset | dirty);
}
 
static bool
gen5_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
BoxPtr box;
 
DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__,
src_width, src_height, dst_width, dst_height));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.op = PictOpSrc;
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
if (src_width == dst_width && src_height == dst_height)
tmp.src.filter = SAMPLER_FILTER_NEAREST;
else
tmp.src.filter = SAMPLER_FILTER_BILINEAR;
tmp.src.repeat = SAMPLER_EXTEND_PAD;
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
tmp.u.gen5.wm_kernel =
is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED;
tmp.u.gen5.ve_id = 2;
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.priv = frame;
 
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
}
 
gen5_video_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen5_get_rectangles(sna, &tmp, 1, gen5_video_bind_surfaces);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen4_vertex_flush(sna);
return true;
}
#endif
 
static void
gen5_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen5_magic_ca_pass(sna,op);
}
 
DBG(("%s()\n", __FUNCTION__));
 
}
 
#if 0
static bool
gen5_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
op->dst.format = dst->format;
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h, partial))
return false;
 
return true;
}
 
static bool
gen5_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
if (op >= ARRAY_SIZE(gen5_blend_op)) {
DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op));
return false;
}
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen5_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (!gen5_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data)) {
DBG(("%s: failed to set composite target\n", __FUNCTION__));
return false;
}
 
DBG(("%s: preparing source\n", __FUNCTION__));
tmp->op = op;
switch (gen5_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare source picture\n", __FUNCTION__));
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
if (mask == NULL &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen5_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen5_blend_op[op].src_alpha &&
(gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) {
if (op != PictOpOver) {
DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op));
goto cleanup_src;
}
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
DBG(("%s: preparing mask\n", __FUNCTION__));
switch (gen5_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
DBG(("%s: failed to prepare mask picture\n", __FUNCTION__));
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen5.wm_kernel =
gen5_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen5_render_composite_blt;
tmp->box = gen5_render_composite_box;
tmp->boxes = gen5_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen5_render_composite_boxes;
tmp->thread_boxes = gen5_render_composite_boxes__thread;
}
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
goto cleanup_mask;
}
 
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen5_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen5_get_rectangles(sna, &op->base, 1, gen5_bind_surfaces);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen5_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen5_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox,
gen5_bind_surfaces);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen5_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
DBG(("%s()\n", __FUNCTION__));
 
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen5_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen5_blend_op))
return false;
 
if (gen5_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
if ((sna->render.prefer_gpu & PREFER_GPU_SPANS) == 0 &&
dst->format == PICT_a8)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen5_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen5_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen5_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height,
true))
return false;
 
switch (gen5_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen5_composite_channel_convert(&tmp->base.src);
break;
}
 
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.has_component_alpha = false;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen5.ve_id = gen4_choose_spans_emitter(sna, tmp);
tmp->base.u.gen5.wm_kernel = WM_KERNEL_OPACITY | !tmp->base.is_affine;
 
tmp->box = gen5_render_composite_spans_box;
tmp->boxes = gen5_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen5_render_composite_spans_boxes__thread;
tmp->done = gen5_render_composite_spans_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
}
 
gen5_bind_surfaces(sna, &tmp->base);
gen5_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
 
 
static bool
gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
 
DBG(("%s alu=%d, src=%ld:handle=%d, dst=%ld:handle=%d boxes=%d x [((%d, %d), (%d, %d))...], flags=%x\n",
__FUNCTION__, alu,
src->drawable.serialNumber, src_bo->handle,
dst->drawable.serialNumber, dst_bo->handle,
n, box->x1, box->y1, box->x2, box->y2,
flags));
 
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
memset(&tmp, 0, sizeof(tmp));
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen5_check_format(tmp.src.pict_format)) {
DBG(("%s: unsupported source format, %x, use BLT\n",
__FUNCTION__, tmp.src.pict_format));
goto fallback_blt;
}
 
DBG(("%s (%d, %d)->(%d, %d) x %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n));
 
tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.x = tmp.dst.y = 0;
tmp.dst.bo = dst_bo;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
goto fallback_tiled;
}
 
tmp.src.filter = SAMPLER_FILTER_NEAREST;
tmp.src.repeat = SAMPLER_EXTEND_NONE;
tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
BoxRec extents = box[0];
int i;
 
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
goto fallback_tiled_dst;
} else {
tmp.src.bo = kgem_bo_reference(src_bo);
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
tmp.src.offset[0] = tmp.src.offset[1] = 0;
tmp.src.scale[0] = 1.f/src->drawable.width;
tmp.src.scale[1] = 1.f/src->drawable.height;
}
 
tmp.is_affine = true;
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
tmp.u.gen5.wm_kernel = WM_KERNEL;
tmp.u.gen5.ve_id = 2;
 
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
DBG(("%s: aperture check failed\n", __FUNCTION__));
goto fallback_tiled_src;
}
}
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
tmp.dst.x = tmp.dst.y = 0;
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
 
gen5_copy_bind_surfaces(sna, &tmp);
gen5_align_vertex(sna, &tmp);
 
do {
int n_this_time;
 
n_this_time = gen5_get_rectangles(sna, &tmp, n,
gen5_copy_bind_surfaces);
n -= n_this_time;
 
do {
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
 
box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
DBG(("%s: tiled fallback\n", __FUNCTION__));
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
#endif
 
static void
gen5_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen5_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (!kgem->nbatch)
return;
 
/* WaNonPipelinedStateCommandFlush
*
* Ironlake has a limitation that a 3D or Media command can't
* be the first command after a BLT, unless it's
* non-pipelined.
*
* We do this by ensuring that the non-pipelined drawrect
* is always emitted first following a switch from BLT.
*/
if (kgem->mode == KGEM_BLT) {
struct sna *sna = to_sna_from_kgem(kgem);
DBG(("%s: forcing drawrect on next state emission\n",
__FUNCTION__));
sna->render_state.gen5.drawrect_limit = -1;
}
 
if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
}
 
static void
discard_vbo(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
 
static void
gen5_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen5_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
discard_vbo(sna);
}
}
 
static void gen5_render_reset(struct sna *sna)
{
sna->render_state.gen5.needs_invariant = true;
sna->render_state.gen5.ve_id = -1;
sna->render_state.gen5.last_primitive = -1;
sna->render_state.gen5.last_pipelined_pointers = 0;
 
sna->render_state.gen5.drawrect_offset = -1;
sna->render_state.gen5.drawrect_limit = -1;
sna->render_state.gen5.surface_table = -1;
 
if (sna->render.vbo &&
!kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
discard_vbo(sna);
}
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen5_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo);
}
 
static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream)
{
struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32);
 
/* Set up the vertex shader to be disabled (passthrough) */
vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
vs->vs6.vs_enable = 0;
vs->vs6.vert_cache_disable = 1;
 
return sna_static_stream_offsetof(stream, vs);
}
 
static uint32_t gen5_create_sf_state(struct sna_static_stream *stream,
uint32_t kernel)
{
struct gen5_sf_unit_state *sf_state;
 
sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32);
 
sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
sf_state->thread0.kernel_start_pointer = kernel >> 6;
 
sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
/* don't smash vertex header, read start from dw8 */
sf_state->thread3.urb_entry_read_offset = 1;
sf_state->thread3.dispatch_grf_start_reg = 3;
sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
sf_state->sf5.viewport_transform = false; /* skip viewport */
sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE;
sf_state->sf6.scissor = 0;
sf_state->sf7.trifan_pv = 2;
sf_state->sf6.dest_org_vbias = 0x8;
sf_state->sf6.dest_org_hbias = 0x8;
 
return sna_static_stream_offsetof(stream, sf_state);
}
 
static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream,
sampler_filter_t src_filter,
sampler_extend_t src_extend,
sampler_filter_t mask_filter,
sampler_extend_t mask_extend)
{
struct gen5_sampler_state *sampler_state;
 
sampler_state = sna_static_stream_map(stream,
sizeof(struct gen5_sampler_state) * 2,
32);
sampler_state_init(&sampler_state[0], src_filter, src_extend);
sampler_state_init(&sampler_state[1], mask_filter, mask_extend);
 
return sna_static_stream_offsetof(stream, sampler_state);
}
 
static void gen5_init_wm_state(struct gen5_wm_unit_state *state,
bool has_mask,
uint32_t kernel,
uint32_t sampler)
{
state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
state->thread0.kernel_start_pointer = kernel >> 6;
 
state->thread1.single_program_flow = 0;
 
/* scratch space is not used in our kernel */
state->thread2.scratch_space_base_pointer = 0;
state->thread2.per_thread_scratch_space = 0;
 
state->thread3.const_urb_entry_read_length = 0;
state->thread3.const_urb_entry_read_offset = 0;
 
state->thread3.urb_entry_read_offset = 0;
/* wm kernel use urb from 3, see wm_program in compiler module */
state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
 
state->wm4.sampler_count = 0; /* hardware requirement */
 
state->wm4.sampler_state_pointer = sampler >> 5;
state->wm5.max_threads = PS_MAX_THREADS - 1;
state->wm5.transposed_urb_read = 0;
state->wm5.thread_dispatch_enable = 1;
/* just use 16-pixel dispatch (4 subspans), don't need to change kernel
* start point
*/
state->wm5.enable_16_pix = 1;
state->wm5.enable_8_pix = 0;
state->wm5.early_depth_test = 1;
 
/* Each pair of attributes (src/mask coords) is two URB entries */
if (has_mask) {
state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
state->thread3.urb_entry_read_length = 4;
} else {
state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
state->thread3.urb_entry_read_length = 2;
}
 
/* binding table entry count is only used for prefetching,
* and it has to be set 0 for Ironlake
*/
state->thread1.binding_table_entry_count = 0;
}
 
static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
{
uint8_t *ptr, *base;
int i, j;
 
base = ptr =
sna_static_stream_map(stream,
GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64,
64);
 
for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) {
for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) {
struct gen5_cc_unit_state *state =
(struct gen5_cc_unit_state *)ptr;
 
state->cc3.blend_enable =
!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
 
state->cc5.logicop_func = 0xc; /* COPY */
state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD;
 
/* Fill in alpha blend factors same as color, for the future. */
state->cc5.ia_src_blend_factor = i;
state->cc5.ia_dest_blend_factor = j;
 
state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD;
state->cc6.clamp_post_alpha_blend = 1;
state->cc6.clamp_pre_alpha_blend = 1;
state->cc6.src_blend_factor = i;
state->cc6.dest_blend_factor = j;
 
ptr += 64;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
static bool gen5_render_setup(struct sna *sna)
{
struct gen5_render_state *state = &sna->render_state.gen5;
struct sna_static_stream general;
struct gen5_wm_unit_state_padded *wm_state;
uint32_t sf[2], wm[KERNEL_COUNT];
int i, j, k, l, m;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
/* Set up the two SF states (one for blending with a mask, one without) */
sf[0] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__nomask);
sf[1] = sna_static_stream_compile_sf(sna, &general, brw_sf_kernel__mask);
 
for (m = 0; m < KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
wm[m] = sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
wm[m] = sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data,
16);
}
assert(wm[m]);
}
 
state->vs = gen5_create_vs_unit_state(&general);
 
state->sf[0] = gen5_create_sf_state(&general, sf[0]);
state->sf[1] = gen5_create_sf_state(&general, sf[1]);
 
 
/* Set up the WM states: each filter/extend type for source and mask, per
* kernel.
*/
wm_state = sna_static_stream_map(&general,
sizeof(*wm_state) * KERNEL_COUNT *
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT,
64);
state->wm = sna_static_stream_offsetof(&general, wm_state);
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
uint32_t sampler_state;
 
sampler_state =
gen5_create_sampler_state(&general,
i, j,
k, l);
 
for (m = 0; m < KERNEL_COUNT; m++) {
gen5_init_wm_state(&wm_state->state,
wm_kernels[m].has_mask,
wm[m], sampler_state);
wm_state++;
}
}
}
}
}
 
state->cc = gen5_create_cc_unit_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
const char *gen5_render_init(struct sna *sna, const char *backend)
{
if (!gen5_render_setup(sna))
return backend;
 
sna->kgem.context_switch = gen5_render_context_switch;
sna->kgem.retire = gen5_render_retire;
sna->kgem.expire = gen5_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen5_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen5_check_composite_spans;
sna->render.composite_spans = gen5_render_composite_spans;
if (sna->PciInfo->device_id == 0x0044)
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen5_render_video;
 
sna->render.copy_boxes = gen5_render_copy_boxes;
sna->render.copy = gen5_render_copy;
 
sna->render.fill_boxes = gen5_render_fill_boxes;
sna->render.fill = gen5_render_fill;
sna->render.fill_one = gen5_render_fill_one;
#endif
 
sna->render.blit_tex = gen5_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen5_render_flush;
sna->render.reset = gen5_render_reset;
sna->render.fini = gen5_render_fini;
 
sna->render.max_3d_size = MAX_3D_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return "Ironlake (gen5)";
};
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.mode));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_x8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen5_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.is_affine = true;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen5_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
tmp->u.gen5.wm_kernel = WM_KERNEL_MASK;
 
// gen5_choose_composite_kernel(tmp->op,
// tmp->mask.bo != NULL,
// tmp->has_component_alpha,
// tmp->is_affine);
tmp->u.gen5.ve_id = gen4_choose_composite_emitter(sna, tmp);
 
tmp->blt = gen5_render_composite_blt;
tmp->done = gen5_render_composite_done;
 
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL)) {
kgem_submit(&sna->kgem);
}
 
gen5_bind_surfaces(sna, tmp);
gen5_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/sna/gen5_render.h
0,0 → 1,2772
/**************************************************************************
*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
**************************************************************************/
 
#ifndef GEN5_RENDER_H
#define GEN5_RENDER_H
 
#define GEN5_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN5_URB_FENCE GEN5_3D(0, 0, 0)
#define GEN5_CS_URB_STATE GEN5_3D(0, 0, 1)
#define GEN5_CONSTANT_BUFFER GEN5_3D(0, 0, 2)
#define GEN5_STATE_PREFETCH GEN5_3D(0, 0, 3)
 
#define GEN5_STATE_BASE_ADDRESS GEN5_3D(0, 1, 1)
#define GEN5_STATE_SIP GEN5_3D(0, 1, 2)
 
#define GEN5_PIPELINE_SELECT GEN5_3D(1, 1, 4)
 
#define GEN5_MEDIA_STATE_POINTERS GEN5_3D(2, 0, 0)
#define GEN5_MEDIA_OBJECT GEN5_3D(2, 1, 0)
 
#define GEN5_3DSTATE_PIPELINED_POINTERS GEN5_3D(3, 0, 0)
#define GEN5_3DSTATE_BINDING_TABLE_POINTERS GEN5_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN5_3DSTATE_VERTEX_BUFFERS GEN5_3D(3, 0, 8)
#define GEN5_3DSTATE_VERTEX_ELEMENTS GEN5_3D(3, 0, 9)
#define GEN5_3DSTATE_INDEX_BUFFER GEN5_3D(3, 0, 0xa)
#define GEN5_3DSTATE_VF_STATISTICS GEN5_3D(3, 0, 0xb)
 
#define GEN5_3DSTATE_DRAWING_RECTANGLE GEN5_3D(3, 1, 0)
#define GEN5_3DSTATE_CONSTANT_COLOR GEN5_3D(3, 1, 1)
#define GEN5_3DSTATE_SAMPLER_PALETTE_LOAD GEN5_3D(3, 1, 2)
#define GEN5_3DSTATE_CHROMA_KEY GEN5_3D(3, 1, 4)
#define GEN5_3DSTATE_DEPTH_BUFFER GEN5_3D(3, 1, 5)
# define GEN5_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN5_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN5_3DSTATE_POLY_STIPPLE_OFFSET GEN5_3D(3, 1, 6)
#define GEN5_3DSTATE_POLY_STIPPLE_PATTERN GEN5_3D(3, 1, 7)
#define GEN5_3DSTATE_LINE_STIPPLE GEN5_3D(3, 1, 8)
#define GEN5_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN5_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN5_3DSTATE_AA_LINE_PARAMS GEN5_3D(3, 1, 0xa)
#define GEN5_3DSTATE_GS_SVB_INDEX GEN5_3D(3, 1, 0xb)
 
#define GEN5_PIPE_CONTROL GEN5_3D(3, 2, 0)
 
#define GEN5_3DPRIMITIVE GEN5_3D(3, 3, 0)
 
#define GEN5_3DSTATE_CLEAR_PARAMS GEN5_3D(3, 1, 0x10)
/* DW1 */
# define GEN5_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN5_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN5_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN5_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN5_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN5_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN5_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN5_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN5_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
 
#define GEN6_3DSTATE_WM GEN5_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN5_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN5_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN5_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN5_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN5_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN5_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN5_3DSTATE_PIPELINED_POINTERS */
#define GEN5_GS_DISABLE 0
#define GEN5_GS_ENABLE 1
#define GEN5_CLIP_DISABLE 0
#define GEN5_CLIP_ENABLE 1
 
/* for GEN5_PIPE_CONTROL */
#define GEN5_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN5_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN5_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN5_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN5_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN5_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN5_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN5_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN5_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN5_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN5_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN5_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 27
#define GEN6_VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 26)
#define VB0_INSTANCEDATA (1 << 26)
#define GEN6_VB0_VERTEXDATA (0 << 20)
#define GEN6_VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27
#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 26)
#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN5_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen5_defines.h */
#define GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN5_SVG_CTL 0x7400
 
#define GEN5_SVG_CTL_GS_BA (0 << 8)
#define GEN5_SVG_CTL_SS_BA (1 << 8)
#define GEN5_SVG_CTL_IO_BA (2 << 8)
#define GEN5_SVG_CTL_GS_AUB (3 << 8)
#define GEN5_SVG_CTL_IO_AUB (4 << 8)
#define GEN5_SVG_CTL_SIP (5 << 8)
 
#define GEN5_SVG_RDATA 0x7404
#define GEN5_SVG_WORK_CTL 0x7408
 
#define GEN5_VF_CTL 0x7500
 
#define GEN5_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN5_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN5_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN5_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN5_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_VF_STRG_VAL 0x7504
#define GEN5_VF_STR_VL_OVR 0x7508
#define GEN5_VF_VC_OVR 0x750c
#define GEN5_VF_STR_PSKIP 0x7510
#define GEN5_VF_MAX_PRIM 0x7514
#define GEN5_VF_RDATA 0x7518
 
#define GEN5_VS_CTL 0x7600
#define GEN5_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN5_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN5_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_VS_STRG_VAL 0x7604
#define GEN5_VS_RDATA 0x7608
 
#define GEN5_SF_CTL 0x7b00
#define GEN5_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN5_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN5_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN5_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN5_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_SF_STRG_VAL 0x7b04
#define GEN5_SF_RDATA 0x7b18
 
#define GEN5_WIZ_CTL 0x7c00
#define GEN5_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN5_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN5_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN5_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN5_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN5_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN5_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN5_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN5_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_WIZ_STRG_VAL 0x7c04
#define GEN5_WIZ_RDATA 0x7c18
 
#define GEN5_TS_CTL 0x7e00
#define GEN5_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN5_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN5_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN5_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN5_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN5_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN5_TS_STRG_VAL 0x7e04
#define GEN5_TS_RDATA 0x7e08
 
#define GEN5_TD_CTL 0x8000
#define GEN5_TD_CTL_MUX_SHIFT 8
#define GEN5_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN5_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN5_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN5_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN5_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN5_TD_CTL2 0x8004
#define GEN5_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN5_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN5_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN5_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN5_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN5_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN5_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN5_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN5_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN5_TD_VF_VS_EMSK 0x8008
#define GEN5_TD_GS_EMSK 0x800c
#define GEN5_TD_CLIP_EMSK 0x8010
#define GEN5_TD_SF_EMSK 0x8014
#define GEN5_TD_WIZ_EMSK 0x8018
#define GEN5_TD_0_6_EHTRG_VAL 0x801c
#define GEN5_TD_0_7_EHTRG_VAL 0x8020
#define GEN5_TD_0_6_EHTRG_MSK 0x8024
#define GEN5_TD_0_7_EHTRG_MSK 0x8028
#define GEN5_TD_RDATA 0x802c
#define GEN5_TD_TS_EMSK 0x8030
 
#define GEN5_EU_CTL 0x8800
#define GEN5_EU_CTL_SELECT_SHIFT 16
#define GEN5_EU_CTL_DATA_MUX_SHIFT 8
#define GEN5_EU_ATT_0 0x8810
#define GEN5_EU_ATT_1 0x8814
#define GEN5_EU_ATT_DATA_0 0x8820
#define GEN5_EU_ATT_DATA_1 0x8824
#define GEN5_EU_ATT_CLR_0 0x8830
#define GEN5_EU_ATT_CLR_1 0x8834
#define GEN5_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN5_ANISORATIO_2 0
#define GEN5_ANISORATIO_4 1
#define GEN5_ANISORATIO_6 2
#define GEN5_ANISORATIO_8 3
#define GEN5_ANISORATIO_10 4
#define GEN5_ANISORATIO_12 5
#define GEN5_ANISORATIO_14 6
#define GEN5_ANISORATIO_16 7
 
#define GEN5_BLENDFACTOR_ONE 0x1
#define GEN5_BLENDFACTOR_SRC_COLOR 0x2
#define GEN5_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN5_BLENDFACTOR_DST_ALPHA 0x4
#define GEN5_BLENDFACTOR_DST_COLOR 0x5
#define GEN5_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN5_BLENDFACTOR_CONST_COLOR 0x7
#define GEN5_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN5_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN5_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN5_BLENDFACTOR_ZERO 0x11
#define GEN5_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN5_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN5_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN5_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN5_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN5_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN5_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN5_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN5_BLENDFUNCTION_ADD 0
#define GEN5_BLENDFUNCTION_SUBTRACT 1
#define GEN5_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN5_BLENDFUNCTION_MIN 3
#define GEN5_BLENDFUNCTION_MAX 4
 
#define GEN5_ALPHATEST_FORMAT_UNORM8 0
#define GEN5_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN5_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN5_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN5_CLIP_API_OGL 0
#define GEN5_CLIP_API_DX 1
 
#define GEN5_CLIPMODE_NORMAL 0
#define GEN5_CLIPMODE_CLIP_ALL 1
#define GEN5_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN5_CLIPMODE_REJECT_ALL 3
#define GEN5_CLIPMODE_ACCEPT_ALL 4
 
#define GEN5_CLIP_NDCSPACE 0
#define GEN5_CLIP_SCREENSPACE 1
 
#define GEN5_COMPAREFUNCTION_ALWAYS 0
#define GEN5_COMPAREFUNCTION_NEVER 1
#define GEN5_COMPAREFUNCTION_LESS 2
#define GEN5_COMPAREFUNCTION_EQUAL 3
#define GEN5_COMPAREFUNCTION_LEQUAL 4
#define GEN5_COMPAREFUNCTION_GREATER 5
#define GEN5_COMPAREFUNCTION_NOTEQUAL 6
#define GEN5_COMPAREFUNCTION_GEQUAL 7
 
#define GEN5_COVERAGE_PIXELS_HALF 0
#define GEN5_COVERAGE_PIXELS_1 1
#define GEN5_COVERAGE_PIXELS_2 2
#define GEN5_COVERAGE_PIXELS_4 3
 
#define GEN5_CULLMODE_BOTH 0
#define GEN5_CULLMODE_NONE 1
#define GEN5_CULLMODE_FRONT 2
#define GEN5_CULLMODE_BACK 3
 
#define GEN5_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN5_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN5_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN5_DEPTHFORMAT_D32_FLOAT 1
#define GEN5_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN5_DEPTHFORMAT_D16_UNORM 5
 
#define GEN5_FLOATING_POINT_IEEE_754 0
#define GEN5_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN5_FRONTWINDING_CW 0
#define GEN5_FRONTWINDING_CCW 1
 
#define GEN5_INDEX_BYTE 0
#define GEN5_INDEX_WORD 1
#define GEN5_INDEX_DWORD 2
 
#define GEN5_LOGICOPFUNCTION_CLEAR 0
#define GEN5_LOGICOPFUNCTION_NOR 1
#define GEN5_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN5_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN5_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN5_LOGICOPFUNCTION_INVERT 5
#define GEN5_LOGICOPFUNCTION_XOR 6
#define GEN5_LOGICOPFUNCTION_NAND 7
#define GEN5_LOGICOPFUNCTION_AND 8
#define GEN5_LOGICOPFUNCTION_EQUIV 9
#define GEN5_LOGICOPFUNCTION_NOOP 10
#define GEN5_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN5_LOGICOPFUNCTION_COPY 12
#define GEN5_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN5_LOGICOPFUNCTION_OR 14
#define GEN5_LOGICOPFUNCTION_SET 15
 
#define GEN5_MAPFILTER_NEAREST 0x0
#define GEN5_MAPFILTER_LINEAR 0x1
#define GEN5_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN5_MIPFILTER_NONE 0
#define GEN5_MIPFILTER_NEAREST 1
#define GEN5_MIPFILTER_LINEAR 3
 
#define GEN5_POLYGON_FRONT_FACING 0
#define GEN5_POLYGON_BACK_FACING 1
 
#define GEN5_PREFILTER_ALWAYS 0x0
#define GEN5_PREFILTER_NEVER 0x1
#define GEN5_PREFILTER_LESS 0x2
#define GEN5_PREFILTER_EQUAL 0x3
#define GEN5_PREFILTER_LEQUAL 0x4
#define GEN5_PREFILTER_GREATER 0x5
#define GEN5_PREFILTER_NOTEQUAL 0x6
#define GEN5_PREFILTER_GEQUAL 0x7
 
#define GEN5_PROVOKING_VERTEX_0 0
#define GEN5_PROVOKING_VERTEX_1 1
#define GEN5_PROVOKING_VERTEX_2 2
 
#define GEN5_RASTRULE_UPPER_LEFT 0
#define GEN5_RASTRULE_UPPER_RIGHT 1
 
#define GEN5_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN5_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN5_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN5_STENCILOP_KEEP 0
#define GEN5_STENCILOP_ZERO 1
#define GEN5_STENCILOP_REPLACE 2
#define GEN5_STENCILOP_INCRSAT 3
#define GEN5_STENCILOP_DECRSAT 4
#define GEN5_STENCILOP_INCR 5
#define GEN5_STENCILOP_DECR 6
#define GEN5_STENCILOP_INVERT 7
 
#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN5_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN5_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN5_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN5_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN5_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN5_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN5_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN5_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN5_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN5_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN5_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN5_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN5_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN5_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN5_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN5_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN5_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN5_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN5_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN5_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN5_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN5_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN5_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN5_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN5_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN5_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN5_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN5_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN5_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN5_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN5_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN5_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN5_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN5_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN5_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN5_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN5_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN5_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN5_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN5_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN5_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN5_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN5_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN5_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN5_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN5_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN5_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN5_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN5_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN5_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN5_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN5_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN5_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN5_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN5_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN5_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN5_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN5_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN5_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN5_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN5_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN5_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN5_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN5_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN5_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN5_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN5_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN5_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN5_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN5_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN5_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN5_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN5_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN5_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN5_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN5_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN5_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN5_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN5_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN5_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN5_SURFACEFORMAT_R16_SINT 0x10C
#define GEN5_SURFACEFORMAT_R16_UINT 0x10D
#define GEN5_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN5_SURFACEFORMAT_I16_UNORM 0x111
#define GEN5_SURFACEFORMAT_L16_UNORM 0x112
#define GEN5_SURFACEFORMAT_A16_UNORM 0x113
#define GEN5_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN5_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN5_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN5_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN5_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN5_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN5_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN5_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN5_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN5_SURFACEFORMAT_R8_UNORM 0x140
#define GEN5_SURFACEFORMAT_R8_SNORM 0x141
#define GEN5_SURFACEFORMAT_R8_SINT 0x142
#define GEN5_SURFACEFORMAT_R8_UINT 0x143
#define GEN5_SURFACEFORMAT_A8_UNORM 0x144
#define GEN5_SURFACEFORMAT_I8_UNORM 0x145
#define GEN5_SURFACEFORMAT_L8_UNORM 0x146
#define GEN5_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN5_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN5_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN5_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN5_SURFACEFORMAT_R1_UINT 0x181
#define GEN5_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN5_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN5_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN5_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN5_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN5_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN5_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN5_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN5_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN5_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN5_SURFACEFORMAT_MONO8 0x18E
#define GEN5_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN5_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN5_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN5_SURFACEFORMAT_FXT1 0x192
#define GEN5_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN5_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN5_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN5_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN5_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN5_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN5_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN5_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN5_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN5_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN5_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN5_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN5_SURFACERETURNFORMAT_FLOAT32 0
#define GEN5_SURFACERETURNFORMAT_S1 1
 
#define GEN5_SURFACE_1D 0
#define GEN5_SURFACE_2D 1
#define GEN5_SURFACE_3D 2
#define GEN5_SURFACE_CUBE 3
#define GEN5_SURFACE_BUFFER 4
#define GEN5_SURFACE_NULL 7
 
#define GEN5_BORDER_COLOR_MODE_DEFAULT 0
#define GEN5_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN5_TEXCOORDMODE_WRAP 0
#define GEN5_TEXCOORDMODE_MIRROR 1
#define GEN5_TEXCOORDMODE_CLAMP 2
#define GEN5_TEXCOORDMODE_CUBE 3
#define GEN5_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN5_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN5_THREAD_PRIORITY_NORMAL 0
#define GEN5_THREAD_PRIORITY_HIGH 1
 
#define GEN5_TILEWALK_XMAJOR 0
#define GEN5_TILEWALK_YMAJOR 1
 
#define GEN5_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN5_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define VFCOMPONENT_NOSTORE 0
#define VFCOMPONENT_STORE_SRC 1
#define VFCOMPONENT_STORE_0 2
#define VFCOMPONENT_STORE_1_FLT 3
#define VFCOMPONENT_STORE_1_INT 4
#define VFCOMPONENT_STORE_VID 5
#define VFCOMPONENT_STORE_IID 6
#define VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN5_ALIGN_1 0
#define GEN5_ALIGN_16 1
 
#define GEN5_ADDRESS_DIRECT 0
#define GEN5_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN5_CHANNEL_X 0
#define GEN5_CHANNEL_Y 1
#define GEN5_CHANNEL_Z 2
#define GEN5_CHANNEL_W 3
 
#define GEN5_COMPRESSION_NONE 0
#define GEN5_COMPRESSION_2NDHALF 1
#define GEN5_COMPRESSION_COMPRESSED 2
 
#define GEN5_CONDITIONAL_NONE 0
#define GEN5_CONDITIONAL_Z 1
#define GEN5_CONDITIONAL_NZ 2
#define GEN5_CONDITIONAL_EQ 1 /* Z */
#define GEN5_CONDITIONAL_NEQ 2 /* NZ */
#define GEN5_CONDITIONAL_G 3
#define GEN5_CONDITIONAL_GE 4
#define GEN5_CONDITIONAL_L 5
#define GEN5_CONDITIONAL_LE 6
#define GEN5_CONDITIONAL_C 7
#define GEN5_CONDITIONAL_O 8
 
#define GEN5_DEBUG_NONE 0
#define GEN5_DEBUG_BREAKPOINT 1
 
#define GEN5_DEPENDENCY_NORMAL 0
#define GEN5_DEPENDENCY_NOTCLEARED 1
#define GEN5_DEPENDENCY_NOTCHECKED 2
#define GEN5_DEPENDENCY_DISABLE 3
 
#define GEN5_EXECUTE_1 0
#define GEN5_EXECUTE_2 1
#define GEN5_EXECUTE_4 2
#define GEN5_EXECUTE_8 3
#define GEN5_EXECUTE_16 4
#define GEN5_EXECUTE_32 5
 
#define GEN5_HORIZONTAL_STRIDE_0 0
#define GEN5_HORIZONTAL_STRIDE_1 1
#define GEN5_HORIZONTAL_STRIDE_2 2
#define GEN5_HORIZONTAL_STRIDE_4 3
 
#define GEN5_INSTRUCTION_NORMAL 0
#define GEN5_INSTRUCTION_SATURATE 1
 
#define GEN5_MASK_ENABLE 0
#define GEN5_MASK_DISABLE 1
 
#define GEN5_OPCODE_MOV 1
#define GEN5_OPCODE_SEL 2
#define GEN5_OPCODE_NOT 4
#define GEN5_OPCODE_AND 5
#define GEN5_OPCODE_OR 6
#define GEN5_OPCODE_XOR 7
#define GEN5_OPCODE_SHR 8
#define GEN5_OPCODE_SHL 9
#define GEN5_OPCODE_RSR 10
#define GEN5_OPCODE_RSL 11
#define GEN5_OPCODE_ASR 12
#define GEN5_OPCODE_CMP 16
#define GEN5_OPCODE_JMPI 32
#define GEN5_OPCODE_IF 34
#define GEN5_OPCODE_IFF 35
#define GEN5_OPCODE_ELSE 36
#define GEN5_OPCODE_ENDIF 37
#define GEN5_OPCODE_DO 38
#define GEN5_OPCODE_WHILE 39
#define GEN5_OPCODE_BREAK 40
#define GEN5_OPCODE_CONTINUE 41
#define GEN5_OPCODE_HALT 42
#define GEN5_OPCODE_MSAVE 44
#define GEN5_OPCODE_MRESTORE 45
#define GEN5_OPCODE_PUSH 46
#define GEN5_OPCODE_POP 47
#define GEN5_OPCODE_WAIT 48
#define GEN5_OPCODE_SEND 49
#define GEN5_OPCODE_ADD 64
#define GEN5_OPCODE_MUL 65
#define GEN5_OPCODE_AVG 66
#define GEN5_OPCODE_FRC 67
#define GEN5_OPCODE_RNDU 68
#define GEN5_OPCODE_RNDD 69
#define GEN5_OPCODE_RNDE 70
#define GEN5_OPCODE_RNDZ 71
#define GEN5_OPCODE_MAC 72
#define GEN5_OPCODE_MACH 73
#define GEN5_OPCODE_LZD 74
#define GEN5_OPCODE_SAD2 80
#define GEN5_OPCODE_SADA2 81
#define GEN5_OPCODE_DP4 84
#define GEN5_OPCODE_DPH 85
#define GEN5_OPCODE_DP3 86
#define GEN5_OPCODE_DP2 87
#define GEN5_OPCODE_DPA2 88
#define GEN5_OPCODE_LINE 89
#define GEN5_OPCODE_NOP 126
 
#define GEN5_PREDICATE_NONE 0
#define GEN5_PREDICATE_NORMAL 1
#define GEN5_PREDICATE_ALIGN1_ANYV 2
#define GEN5_PREDICATE_ALIGN1_ALLV 3
#define GEN5_PREDICATE_ALIGN1_ANY2H 4
#define GEN5_PREDICATE_ALIGN1_ALL2H 5
#define GEN5_PREDICATE_ALIGN1_ANY4H 6
#define GEN5_PREDICATE_ALIGN1_ALL4H 7
#define GEN5_PREDICATE_ALIGN1_ANY8H 8
#define GEN5_PREDICATE_ALIGN1_ALL8H 9
#define GEN5_PREDICATE_ALIGN1_ANY16H 10
#define GEN5_PREDICATE_ALIGN1_ALL16H 11
#define GEN5_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN5_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN5_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN5_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN5_PREDICATE_ALIGN16_ANY4H 6
#define GEN5_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN5_ARCHITECTURE_REGISTER_FILE 0
#define GEN5_GENERAL_REGISTER_FILE 1
#define GEN5_MESSAGE_REGISTER_FILE 2
#define GEN5_IMMEDIATE_VALUE 3
 
#define GEN5_REGISTER_TYPE_UD 0
#define GEN5_REGISTER_TYPE_D 1
#define GEN5_REGISTER_TYPE_UW 2
#define GEN5_REGISTER_TYPE_W 3
#define GEN5_REGISTER_TYPE_UB 4
#define GEN5_REGISTER_TYPE_B 5
#define GEN5_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN5_REGISTER_TYPE_HF 6
#define GEN5_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN5_REGISTER_TYPE_F 7
 
#define GEN5_ARF_NULL 0x00
#define GEN5_ARF_ADDRESS 0x10
#define GEN5_ARF_ACCUMULATOR 0x20
#define GEN5_ARF_FLAG 0x30
#define GEN5_ARF_MASK 0x40
#define GEN5_ARF_MASK_STACK 0x50
#define GEN5_ARF_MASK_STACK_DEPTH 0x60
#define GEN5_ARF_STATE 0x70
#define GEN5_ARF_CONTROL 0x80
#define GEN5_ARF_NOTIFICATION_COUNT 0x90
#define GEN5_ARF_IP 0xA0
 
#define GEN5_AMASK 0
#define GEN5_IMASK 1
#define GEN5_LMASK 2
#define GEN5_CMASK 3
 
 
 
#define GEN5_THREAD_NORMAL 0
#define GEN5_THREAD_ATOMIC 1
#define GEN5_THREAD_SWITCH 2
 
#define GEN5_VERTICAL_STRIDE_0 0
#define GEN5_VERTICAL_STRIDE_1 1
#define GEN5_VERTICAL_STRIDE_2 2
#define GEN5_VERTICAL_STRIDE_4 3
#define GEN5_VERTICAL_STRIDE_8 4
#define GEN5_VERTICAL_STRIDE_16 5
#define GEN5_VERTICAL_STRIDE_32 6
#define GEN5_VERTICAL_STRIDE_64 7
#define GEN5_VERTICAL_STRIDE_128 8
#define GEN5_VERTICAL_STRIDE_256 9
#define GEN5_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN5_WIDTH_1 0
#define GEN5_WIDTH_2 1
#define GEN5_WIDTH_4 2
#define GEN5_WIDTH_8 3
#define GEN5_WIDTH_16 4
 
#define GEN5_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN5_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN5_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN5_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN5_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN5_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN5_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN5_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN5_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN5_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN5_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN5_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN5_POLYGON_FACING_FRONT 0
#define GEN5_POLYGON_FACING_BACK 1
 
#define GEN5_MESSAGE_TARGET_NULL 0
#define GEN5_MESSAGE_TARGET_MATH 1
#define GEN5_MESSAGE_TARGET_SAMPLER 2
#define GEN5_MESSAGE_TARGET_GATEWAY 3
#define GEN5_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN5_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN5_MESSAGE_TARGET_URB 6
#define GEN5_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN5_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN5_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN5_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN5_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN5_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN5_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN5_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN5_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN5_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN5_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN5_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN5_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN5_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN5_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN5_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN5_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN5_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN5_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN5_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN5_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN5_MATH_FUNCTION_INV 1
#define GEN5_MATH_FUNCTION_LOG 2
#define GEN5_MATH_FUNCTION_EXP 3
#define GEN5_MATH_FUNCTION_SQRT 4
#define GEN5_MATH_FUNCTION_RSQ 5
#define GEN5_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN5_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN5_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN5_MATH_FUNCTION_TAN 9
#define GEN5_MATH_FUNCTION_POW 10
#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN5_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN5_MATH_INTEGER_UNSIGNED 0
#define GEN5_MATH_INTEGER_SIGNED 1
 
#define GEN5_MATH_PRECISION_FULL 0
#define GEN5_MATH_PRECISION_PARTIAL 1
 
#define GEN5_MATH_SATURATE_NONE 0
#define GEN5_MATH_SATURATE_SATURATE 1
 
#define GEN5_MATH_DATA_VECTOR 0
#define GEN5_MATH_DATA_SCALAR 1
 
#define GEN5_URB_OPCODE_WRITE 0
 
#define GEN5_URB_SWIZZLE_NONE 0
#define GEN5_URB_SWIZZLE_INTERLEAVE 1
#define GEN5_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN5_SCRATCH_SPACE_SIZE_1K 0
#define GEN5_SCRATCH_SPACE_SIZE_2K 1
#define GEN5_SCRATCH_SPACE_SIZE_4K 2
#define GEN5_SCRATCH_SPACE_SIZE_8K 3
#define GEN5_SCRATCH_SPACE_SIZE_16K 4
#define GEN5_SCRATCH_SPACE_SIZE_32K 5
#define GEN5_SCRATCH_SPACE_SIZE_64K 6
#define GEN5_SCRATCH_SPACE_SIZE_128K 7
#define GEN5_SCRATCH_SPACE_SIZE_256K 8
#define GEN5_SCRATCH_SPACE_SIZE_512K 9
#define GEN5_SCRATCH_SPACE_SIZE_1M 10
#define GEN5_SCRATCH_SPACE_SIZE_2M 11
 
 
 
 
#define CMD_URB_FENCE 0x6000
#define CMD_CONST_BUFFER_STATE 0x6001
#define CMD_CONST_BUFFER 0x6002
 
#define CMD_STATE_BASE_ADDRESS 0x6101
#define CMD_STATE_INSN_POINTER 0x6102
#define CMD_PIPELINE_SELECT 0x6104
 
#define CMD_PIPELINED_STATE_POINTERS 0x7800
#define CMD_BINDING_TABLE_PTRS 0x7801
#define CMD_VERTEX_BUFFER 0x7808
#define CMD_VERTEX_ELEMENT 0x7809
#define CMD_INDEX_BUFFER 0x780a
#define CMD_VF_STATISTICS 0x780b
 
#define CMD_DRAW_RECT 0x7900
#define CMD_BLEND_CONSTANT_COLOR 0x7901
#define CMD_CHROMA_KEY 0x7904
#define CMD_DEPTH_BUFFER 0x7905
#define CMD_POLY_STIPPLE_OFFSET 0x7906
#define CMD_POLY_STIPPLE_PATTERN 0x7907
#define CMD_LINE_STIPPLE_PATTERN 0x7908
#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908
 
#define CMD_PIPE_CONTROL 0x7a00
 
#define CMD_3D_PRIM 0x7b00
 
#define CMD_MI_FLUSH 0x0200
 
 
/* Various values from the R0 vertex header:
*/
#define R02_PRIM_END 0x1
#define R02_PRIM_START 0x2
 
/* media pipeline */
 
#define GEN5_VFE_MODE_GENERIC 0x0
#define GEN5_VFE_MODE_VLD_MPEG2 0x1
#define GEN5_VFE_MODE_IS 0x2
#define GEN5_VFE_MODE_AVC_MC 0x4
#define GEN5_VFE_MODE_AVC_IT 0x7
#define GEN5_VFE_MODE_VC1_IT 0xB
 
#define GEN5_VFE_DEBUG_COUNTER_FREE 0
#define GEN5_VFE_DEBUG_COUNTER_FROZEN 1
#define GEN5_VFE_DEBUG_COUNTER_ONCE 2
#define GEN5_VFE_DEBUG_COUNTER_ALWAYS 3
 
/* VLD_STATE */
#define GEN5_MPEG_TOP_FIELD 1
#define GEN5_MPEG_BOTTOM_FIELD 2
#define GEN5_MPEG_FRAME 3
#define GEN5_MPEG_QSCALE_LINEAR 0
#define GEN5_MPEG_QSCALE_NONLINEAR 1
#define GEN5_MPEG_ZIGZAG_SCAN 0
#define GEN5_MPEG_ALTER_VERTICAL_SCAN 1
#define GEN5_MPEG_I_PICTURE 1
#define GEN5_MPEG_P_PICTURE 2
#define GEN5_MPEG_B_PICTURE 3
 
/* Command packets:
*/
struct header
{
unsigned int length:16;
unsigned int opcode:16;
};
 
 
union header_union
{
struct header bits;
unsigned int dword;
};
 
struct gen5_3d_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:3;
unsigned int wc_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int operation:2;
unsigned int opcode:16;
} header;
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} dest;
unsigned int dword2;
unsigned int dword3;
};
 
 
struct gen5_3d_primitive
{
struct
{
unsigned int length:8;
unsigned int pad:2;
unsigned int topology:5;
unsigned int indexed:1;
unsigned int opcode:16;
} header;
 
unsigned int verts_per_instance;
unsigned int start_vert_location;
unsigned int instance_count;
unsigned int start_instance_location;
unsigned int base_vert_location;
};
 
/* These seem to be passed around as function args, so it works out
* better to keep them as #defines:
*/
#define GEN5_FLUSH_READ_CACHE 0x1
#define GEN5_FLUSH_STATE_CACHE 0x2
#define GEN5_INHIBIT_FLUSH_RENDER_CACHE 0x4
#define GEN5_FLUSH_SNAPSHOT_COUNTERS 0x8
 
struct gen5_mi_flush
{
unsigned int flags:4;
unsigned int pad:12;
unsigned int opcode:16;
};
 
struct gen5_vf_statistics
{
unsigned int statistics_enable:1;
unsigned int pad:15;
unsigned int opcode:16;
};
 
 
 
struct gen5_binding_table_pointers
{
struct header header;
unsigned int vs;
unsigned int gs;
unsigned int clp;
unsigned int sf;
unsigned int wm;
};
 
 
struct gen5_blend_constant_color
{
struct header header;
float blend_constant_color[4];
};
 
 
struct gen5_depthbuffer
{
union header_union header;
union {
struct {
unsigned int pitch:18;
unsigned int format:3;
unsigned int pad:4;
unsigned int depth_offset_disable:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad2:1;
unsigned int surface_type:3;
} bits;
unsigned int dword;
} dword1;
unsigned int dword2_base_addr;
union {
struct {
unsigned int pad:1;
unsigned int mipmap_layout:1;
unsigned int lod:4;
unsigned int width:13;
unsigned int height:13;
} bits;
unsigned int dword;
} dword3;
 
union {
struct {
unsigned int pad:12;
unsigned int min_array_element:9;
unsigned int depth:11;
} bits;
unsigned int dword;
} dword4;
};
 
struct gen5_drawrect
{
struct header header;
unsigned int xmin:16;
unsigned int ymin:16;
unsigned int xmax:16;
unsigned int ymax:16;
unsigned int xorg:16;
unsigned int yorg:16;
};
 
 
 
 
struct gen5_global_depth_offset_clamp
{
struct header header;
float depth_offset_clamp;
};
 
struct gen5_indexbuffer
{
union {
struct
{
unsigned int length:8;
unsigned int index_format:2;
unsigned int cut_index_enable:1;
unsigned int pad:5;
unsigned int opcode:16;
} bits;
unsigned int dword;
 
} header;
 
unsigned int buffer_start;
unsigned int buffer_end;
};
 
 
struct gen5_line_stipple
{
struct header header;
struct
{
unsigned int pattern:16;
unsigned int pad:16;
} bits0;
struct
{
unsigned int repeat_count:9;
unsigned int pad:7;
unsigned int inverse_repeat_count:16;
} bits1;
};
 
 
struct gen5_pipelined_state_pointers
{
struct header header;
struct {
unsigned int pad:5;
unsigned int offset:27;
} vs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} gs;
struct
{
unsigned int enable:1;
unsigned int pad:4;
unsigned int offset:27;
} clp;
struct
{
unsigned int pad:5;
unsigned int offset:27;
} sf;
 
struct
{
unsigned int pad:5;
unsigned int offset:27;
} wm;
struct
{
unsigned int pad:5;
unsigned int offset:27; /* KW: check me! */
} cc;
};
 
 
struct gen5_polygon_stipple_offset
{
struct header header;
 
struct {
unsigned int y_offset:5;
unsigned int pad:3;
unsigned int x_offset:5;
unsigned int pad0:19;
} bits0;
};
 
 
 
struct gen5_polygon_stipple
{
struct header header;
unsigned int stipple[32];
};
 
 
 
struct gen5_pipeline_select
{
struct
{
unsigned int pipeline_select:1;
unsigned int pad:15;
unsigned int opcode:16;
} header;
};
 
 
struct gen5_pipe_control
{
struct
{
unsigned int length:8;
unsigned int notify_enable:1;
unsigned int pad:2;
unsigned int instruction_state_cache_flush_enable:1;
unsigned int write_cache_flush_enable:1;
unsigned int depth_stall_enable:1;
unsigned int post_sync_operation:2;
 
unsigned int opcode:16;
} header;
 
struct
{
unsigned int pad:2;
unsigned int dest_addr_type:1;
unsigned int dest_addr:29;
} bits1;
 
unsigned int data0;
unsigned int data1;
};
 
 
struct gen5_urb_fence
{
struct
{
unsigned int length:8;
unsigned int vs_realloc:1;
unsigned int gs_realloc:1;
unsigned int clp_realloc:1;
unsigned int sf_realloc:1;
unsigned int vfe_realloc:1;
unsigned int cs_realloc:1;
unsigned int pad:2;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int vs_fence:10;
unsigned int gs_fence:10;
unsigned int clp_fence:10;
unsigned int pad:2;
} bits0;
 
struct
{
unsigned int sf_fence:10;
unsigned int vf_fence:10;
unsigned int cs_fence:10;
unsigned int pad:2;
} bits1;
};
 
struct gen5_constant_buffer_state /* previously gen5_command_streamer */
{
struct header header;
 
struct
{
unsigned int nr_urb_entries:3;
unsigned int pad:1;
unsigned int urb_entry_size:5;
unsigned int pad0:23;
} bits0;
};
 
struct gen5_constant_buffer
{
struct
{
unsigned int length:8;
unsigned int valid:1;
unsigned int pad:7;
unsigned int opcode:16;
} header;
 
struct
{
unsigned int buffer_length:6;
unsigned int buffer_address:26;
} bits0;
};
 
struct gen5_state_base_address
{
struct header header;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int general_state_address:27;
} bits0;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int surface_state_address:27;
} bits1;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:4;
unsigned int indirect_object_state_address:27;
} bits2;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int general_state_upper_bound:20;
} bits3;
 
struct
{
unsigned int modify_enable:1;
unsigned int pad:11;
unsigned int indirect_object_state_upper_bound:20;
} bits4;
};
 
struct gen5_state_prefetch
{
struct header header;
 
struct
{
unsigned int prefetch_count:3;
unsigned int pad:3;
unsigned int prefetch_pointer:26;
} bits0;
};
 
struct gen5_system_instruction_pointer
{
struct header header;
 
struct
{
unsigned int pad:4;
unsigned int system_instruction_pointer:28;
} bits0;
};
 
 
 
 
/* State structs for the various fixed function units:
*/
 
 
struct thread0
{
unsigned int pad0:1;
unsigned int grf_reg_count:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer:26;
};
 
struct thread1
{
unsigned int ext_halt_exception_enable:1;
unsigned int sw_exception_enable:1;
unsigned int mask_stack_exception_enable:1;
unsigned int timeout_exception_enable:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad0:3;
unsigned int depth_coef_urb_read_offset:6; /* WM only */
unsigned int pad1:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad3:5;
unsigned int single_program_flow:1;
};
 
struct thread2
{
unsigned int per_thread_scratch_space:4;
unsigned int pad0:6;
unsigned int scratch_space_base_pointer:22;
};
 
struct thread3
{
unsigned int dispatch_grf_start_reg:4;
unsigned int urb_entry_read_offset:6;
unsigned int pad0:1;
unsigned int urb_entry_read_length:6;
unsigned int pad1:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int pad2:1;
unsigned int const_urb_entry_read_length:6;
unsigned int pad3:1;
};
 
 
 
struct gen5_clip_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:9;
unsigned int gs_output_stats:1; /* not always */
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6; /* may be less */
unsigned int pad3:1;
} thread4;
struct
{
unsigned int pad0:13;
unsigned int clip_mode:3;
unsigned int userclip_enable_flags:8;
unsigned int userclip_must_clip:1;
unsigned int pad1:1;
unsigned int guard_band_enable:1;
unsigned int viewport_z_clip_enable:1;
unsigned int viewport_xy_clip_enable:1;
unsigned int vertex_position_space:1;
unsigned int api_mode:1;
unsigned int pad2:1;
} clip5;
struct
{
unsigned int pad0:5;
unsigned int clipper_viewport_state_ptr:27;
} clip6;
 
float viewport_xmin;
float viewport_xmax;
float viewport_ymin;
float viewport_ymax;
};
 
 
 
struct gen5_cc_unit_state
{
struct
{
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} cc0;
 
struct
{
unsigned int bf_stencil_ref:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
unsigned int stencil_ref:8;
} cc1;
 
struct
{
unsigned int logicop_enable:1;
unsigned int pad0:10;
unsigned int depth_write_enable:1;
unsigned int depth_test_function:3;
unsigned int depth_test:1;
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
} cc2;
 
struct
{
unsigned int pad0:8;
unsigned int alpha_test_func:3;
unsigned int alpha_test:1;
unsigned int blend_enable:1;
unsigned int ia_blend_enable:1;
unsigned int pad1:1;
unsigned int alpha_test_format:1;
unsigned int pad2:16;
} cc3;
struct
{
unsigned int pad0:5;
unsigned int cc_viewport_state_offset:27;
} cc4;
struct
{
unsigned int pad0:2;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_src_blend_factor:5;
unsigned int ia_blend_function:3;
unsigned int statistics_enable:1;
unsigned int logicop_func:4;
unsigned int pad1:11;
unsigned int dither_enable:1;
} cc5;
 
struct
{
unsigned int clamp_post_alpha_blend:1;
unsigned int clamp_pre_alpha_blend:1;
unsigned int clamp_range:2;
unsigned int pad0:11;
unsigned int y_dither_offset:2;
unsigned int x_dither_offset:2;
unsigned int dest_blend_factor:5;
unsigned int src_blend_factor:5;
unsigned int blend_function:3;
} cc6;
 
struct {
union {
float f;
unsigned char ub[4];
} alpha_ref;
} cc7;
};
 
 
 
struct gen5_sf_unit_state
{
struct thread0 thread0;
struct {
unsigned int pad0:7;
unsigned int sw_exception_enable:1;
unsigned int pad1:3;
unsigned int mask_stack_exception_enable:1;
unsigned int pad2:1;
unsigned int illegal_op_exception_enable:1;
unsigned int pad3:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int binding_table_entry_count:8;
unsigned int pad4:5;
unsigned int single_program_flow:1;
} sf1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:6;
unsigned int pad3:1;
} thread4;
 
struct
{
unsigned int front_winding:1;
unsigned int viewport_transform:1;
unsigned int pad0:3;
unsigned int sf_viewport_state_offset:27;
} sf5;
struct
{
unsigned int pad0:9;
unsigned int dest_org_vbias:4;
unsigned int dest_org_hbias:4;
unsigned int scissor:1;
unsigned int disable_2x2_trifilter:1;
unsigned int disable_zero_pix_trifilter:1;
unsigned int point_rast_rule:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int line_width:4;
unsigned int fast_scissor_disable:1;
unsigned int cull_mode:2;
unsigned int aa_enable:1;
} sf6;
 
struct
{
unsigned int point_size:11;
unsigned int use_point_size_state:1;
unsigned int subpixel_precision:1;
unsigned int sprite_point:1;
unsigned int pad0:11;
unsigned int trifan_pv:2;
unsigned int linestrip_pv:2;
unsigned int tristrip_pv:2;
unsigned int line_last_pixel_enable:1;
} sf7;
 
};
 
 
struct gen5_gs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
 
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:1;
unsigned int pad3:6;
} thread4;
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} gs5;
 
struct
{
unsigned int max_vp_index:4;
unsigned int pad0:26;
unsigned int reorder_enable:1;
unsigned int pad1:1;
} gs6;
};
 
 
struct gen5_vs_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct
{
unsigned int pad0:10;
unsigned int stats_enable:1;
unsigned int nr_urb_entries:7;
unsigned int pad1:1;
unsigned int urb_entry_allocation_size:5;
unsigned int pad2:1;
unsigned int max_threads:4;
unsigned int pad3:3;
} thread4;
 
struct
{
unsigned int sampler_count:3;
unsigned int pad0:2;
unsigned int sampler_state_pointer:27;
} vs5;
 
struct
{
unsigned int vs_enable:1;
unsigned int vert_cache_disable:1;
unsigned int pad0:30;
} vs6;
};
 
 
struct gen5_wm_unit_state
{
struct thread0 thread0;
struct thread1 thread1;
struct thread2 thread2;
struct thread3 thread3;
struct {
unsigned int stats_enable:1;
unsigned int pad0:1;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} wm4;
struct
{
unsigned int enable_8_pix:1;
unsigned int enable_16_pix:1;
unsigned int enable_32_pix:1;
unsigned int pad0:7;
unsigned int legacy_global_depth_bias:1;
unsigned int line_stipple:1;
unsigned int depth_offset:1;
unsigned int polygon_stipple:1;
unsigned int line_aa_region_width:2;
unsigned int line_endcap_aa_region_width:2;
unsigned int early_depth_test:1;
unsigned int thread_dispatch_enable:1;
unsigned int program_uses_depth:1;
unsigned int program_computes_depth:1;
unsigned int program_uses_killpixel:1;
unsigned int legacy_line_rast: 1;
unsigned int transposed_urb_read:1;
unsigned int max_threads:7;
} wm5;
float global_depth_offset_constant;
float global_depth_offset_scale;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_1:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_1:26;
} wm8;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_2:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_2:26;
} wm9;
 
struct {
unsigned int pad0:1;
unsigned int grf_reg_count_3:3;
unsigned int pad1:2;
unsigned int kernel_start_pointer_3:26;
} wm10;
};
 
struct gen5_wm_unit_state_padded {
struct gen5_wm_unit_state state;
char pad[64 - sizeof(struct gen5_wm_unit_state)];
};
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen5_sampler_default_border_color {
float color[4];
};
 
struct gen5_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen5_sampler_state {
struct {
unsigned int shadow_function:3;
unsigned int lod_bias:11;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad:1;
unsigned int lod_preclamp:1;
unsigned int border_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct {
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:3;
unsigned int max_lod:10;
unsigned int min_lod:10;
} ss1;
 
struct {
unsigned int pad:5;
unsigned int border_color_pointer:27;
} ss2;
 
struct {
uint32_t pad:13;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t mbz:6;
} ss3;
};
 
 
struct gen5_clipper_viewport
{
float xmin;
float xmax;
float ymin;
float ymax;
};
 
struct gen5_cc_viewport
{
float min_depth;
float max_depth;
};
 
struct gen5_sf_viewport
{
struct {
float m00;
float m11;
float m22;
float m30;
float m31;
float m32;
} viewport;
 
struct {
short xmin;
short ymin;
short xmax;
short ymax;
} scissor;
};
 
/* Documented in the subsystem/shared-functions/sampler chapter...
*/
struct gen5_surface_state
{
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad:3;
unsigned int render_cache_read_mode:1;
unsigned int mipmap_layout_mode:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int color_blend:1;
unsigned int writedisable_blue:1;
unsigned int writedisable_green:1;
unsigned int writedisable_red:1;
unsigned int writedisable_alpha:1;
unsigned int surface_format:9;
unsigned int data_return_format:1;
unsigned int pad0:1;
unsigned int surface_type:3;
} ss0;
struct {
unsigned int base_addr;
} ss1;
struct {
unsigned int render_target_rotation:2;
unsigned int mip_count:4;
unsigned int width:13;
unsigned int height:13;
} ss2;
 
struct {
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int pad:1;
unsigned int pitch:18;
unsigned int depth:11;
} ss3;
struct {
unsigned int pad:19;
unsigned int min_array_elt:9;
unsigned int min_lod:4;
} ss4;
 
struct {
unsigned int pad:20;
unsigned int y_offset:4;
unsigned int pad2:1;
unsigned int x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN5_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN5_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN5_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN5_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN5_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN5_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN5_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN5_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN5_SURFACE_FORMAT_SHIFT 18
#define GEN5_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define GEN5_SURFACE_TYPE_SHIFT 29
#define GEN5_SURFACE_TYPE_MASK GEN5_MASK(31, 29)
#define GEN5_SURFACE_1D 0
#define GEN5_SURFACE_2D 1
#define GEN5_SURFACE_3D 2
#define GEN5_SURFACE_CUBE 3
#define GEN5_SURFACE_BUFFER 4
#define GEN5_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN5_SURFACE_HEIGHT_SHIFT 19
#define GEN5_SURFACE_HEIGHT_MASK GEN5_MASK(31, 19)
#define GEN5_SURFACE_WIDTH_SHIFT 6
#define GEN5_SURFACE_WIDTH_MASK GEN5_MASK(18, 6)
#define GEN5_SURFACE_LOD_SHIFT 2
#define GEN5_SURFACE_LOD_MASK GEN5_MASK(5, 2)
 
/* Surface state DW3 */
#define GEN5_SURFACE_DEPTH_SHIFT 21
#define GEN5_SURFACE_DEPTH_MASK GEN5_MASK(31, 21)
#define GEN5_SURFACE_PITCH_SHIFT 3
#define GEN5_SURFACE_PITCH_MASK GEN5_MASK(19, 3)
#define GEN5_SURFACE_TILED (1 << 1)
#define GEN5_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN5_SURFACE_MIN_LOD_SHIFT 28
#define GEN5_SURFACE_MIN_LOD_MASK GEN5_MASK(31, 28)
 
/* Surface state DW5 */
#define GEN5_SURFACE_X_OFFSET_SHIFT 25
#define GEN5_SURFACE_X_OFFSET_MASK GEN5_MASK(31, 25)
#define GEN5_SURFACE_Y_OFFSET_SHIFT 20
#define GEN5_SURFACE_Y_OFFSET_MASK GEN5_MASK(23, 20)
 
struct gen5_vertex_buffer_state
{
struct {
unsigned int pitch:11;
unsigned int pad:15;
unsigned int access_type:1;
unsigned int vb_index:5;
} vb0;
unsigned int start_addr;
unsigned int max_index;
#if 1
unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */
#endif
};
 
#define GEN5_VBP_MAX 17
 
struct gen5_vb_array_state {
struct header header;
struct gen5_vertex_buffer_state vb[GEN5_VBP_MAX];
};
 
 
struct gen5_vertex_element_state
{
struct
{
unsigned int src_offset:11;
unsigned int pad:5;
unsigned int src_format:9;
unsigned int pad0:1;
unsigned int valid:1;
unsigned int vertex_buffer_index:5;
} ve0;
struct
{
unsigned int dst_offset:8;
unsigned int pad:8;
unsigned int vfcomponent3:4;
unsigned int vfcomponent2:4;
unsigned int vfcomponent1:4;
unsigned int vfcomponent0:4;
} ve1;
};
 
#define GEN5_VEP_MAX 18
 
struct gen5_vertex_element_packet {
struct header header;
struct gen5_vertex_element_state ve[GEN5_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
};
 
 
struct gen5_urb_immediate {
unsigned int opcode:4;
unsigned int offset:6;
unsigned int swizzle_control:2;
unsigned int pad:1;
unsigned int allocate:1;
unsigned int used:1;
unsigned int complete:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
};
 
/* Instruction format for the execution units:
*/
struct gen5_instruction
{
struct
{
unsigned int opcode:7;
unsigned int pad:1;
unsigned int access_mode:1;
unsigned int mask_control:1;
unsigned int dependency_control:2;
unsigned int compression_control:2;
unsigned int thread_control:2;
unsigned int predicate_control:4;
unsigned int predicate_inverse:1;
unsigned int execution_size:3;
unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */
unsigned int pad0:2;
unsigned int debug_control:1;
unsigned int saturate:1;
} header;
 
union {
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad:1;
unsigned int dest_subreg_nr:5;
unsigned int dest_reg_nr:8;
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} da1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad:6;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned int dest_horiz_stride:2;
unsigned int dest_address_mode:1;
} ia1;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int src1_reg_file:2;
unsigned int src1_reg_type:3;
unsigned int pad0:1;
unsigned int dest_writemask:4;
unsigned int dest_subreg_nr:1;
unsigned int dest_reg_nr:8;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} da16;
 
struct
{
unsigned int dest_reg_file:2;
unsigned int dest_reg_type:3;
unsigned int src0_reg_file:2;
unsigned int src0_reg_type:3;
unsigned int pad0:6;
unsigned int dest_writemask:4;
int dest_indirect_offset:6;
unsigned int dest_subreg_nr:3;
unsigned int pad1:2;
unsigned int dest_address_mode:1;
} ia16;
} bits1;
 
 
union {
struct
{
unsigned int src0_subreg_nr:5;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} da1;
 
struct
{
int src0_indirect_offset:10;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_horiz_stride:2;
unsigned int src0_width:3;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad:6;
} ia1;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
unsigned int src0_subreg_nr:1;
unsigned int src0_reg_nr:8;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} da16;
 
struct
{
unsigned int src0_swz_x:2;
unsigned int src0_swz_y:2;
int src0_indirect_offset:6;
unsigned int src0_subreg_nr:3;
unsigned int src0_abs:1;
unsigned int src0_negate:1;
unsigned int src0_address_mode:1;
unsigned int src0_swz_z:2;
unsigned int src0_swz_w:2;
unsigned int pad0:1;
unsigned int src0_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia16;
 
} bits2;
 
union
{
struct
{
unsigned int src1_subreg_nr:5;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int pad0:7;
} da1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
unsigned int src1_subreg_nr:1;
unsigned int src1_reg_nr:8;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int pad2:7;
} da16;
 
struct
{
int src1_indirect_offset:10;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_horiz_stride:2;
unsigned int src1_width:3;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad1:6;
} ia1;
 
struct
{
unsigned int src1_swz_x:2;
unsigned int src1_swz_y:2;
int src1_indirect_offset:6;
unsigned int src1_subreg_nr:3;
unsigned int src1_abs:1;
unsigned int src1_negate:1;
unsigned int pad0:1;
unsigned int src1_swz_z:2;
unsigned int src1_swz_w:2;
unsigned int pad1:1;
unsigned int src1_vert_stride:4;
unsigned int flag_reg_nr:1;
unsigned int pad2:6;
} ia16;
 
 
struct
{
int jump_count:16; /* note: signed */
unsigned int pop_count:4;
unsigned int pad0:12;
} if_else;
 
struct {
unsigned int function:4;
unsigned int int_type:1;
unsigned int precision:1;
unsigned int saturate:1;
unsigned int data_type:1;
unsigned int pad0:8;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} math;
 
struct {
unsigned int binding_table_index:8;
unsigned int sampler:4;
unsigned int return_format:2;
unsigned int msg_type:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} sampler;
 
struct gen5_urb_immediate urb;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:4;
unsigned int msg_type:2;
unsigned int target_cache:2;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_read;
 
struct {
unsigned int binding_table_index:8;
unsigned int msg_control:3;
unsigned int pixel_scoreboard_clear:1;
unsigned int msg_type:3;
unsigned int send_commit_msg:1;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} dp_write;
 
struct {
unsigned int pad:16;
unsigned int response_length:4;
unsigned int msg_length:4;
unsigned int msg_target:4;
unsigned int pad1:3;
unsigned int end_of_thread:1;
} generic;
 
unsigned int ud;
} bits3;
};
 
/* media pipeline */
 
struct gen5_vfe_state {
struct {
unsigned int per_thread_scratch_space:4;
unsigned int pad3:3;
unsigned int extend_vfe_state_present:1;
unsigned int pad2:2;
unsigned int scratch_base:22;
} vfe0;
 
struct {
unsigned int debug_counter_control:2;
unsigned int children_present:1;
unsigned int vfe_mode:4;
unsigned int pad2:2;
unsigned int num_urb_entries:7;
unsigned int urb_entry_alloc_size:9;
unsigned int max_threads:7;
} vfe1;
 
struct {
unsigned int pad4:4;
unsigned int interface_descriptor_base:28;
} vfe2;
};
 
struct gen5_vld_state {
struct {
unsigned int pad6:6;
unsigned int scan_order:1;
unsigned int intra_vlc_format:1;
unsigned int quantizer_scale_type:1;
unsigned int concealment_motion_vector:1;
unsigned int frame_predict_frame_dct:1;
unsigned int top_field_first:1;
unsigned int picture_structure:2;
unsigned int intra_dc_precision:2;
unsigned int f_code_0_0:4;
unsigned int f_code_0_1:4;
unsigned int f_code_1_0:4;
unsigned int f_code_1_1:4;
} vld0;
 
struct {
unsigned int pad2:9;
unsigned int picture_coding_type:2;
unsigned int pad:21;
} vld1;
 
struct {
unsigned int index_0:4;
unsigned int index_1:4;
unsigned int index_2:4;
unsigned int index_3:4;
unsigned int index_4:4;
unsigned int index_5:4;
unsigned int index_6:4;
unsigned int index_7:4;
} desc_remap_table0;
 
struct {
unsigned int index_8:4;
unsigned int index_9:4;
unsigned int index_10:4;
unsigned int index_11:4;
unsigned int index_12:4;
unsigned int index_13:4;
unsigned int index_14:4;
unsigned int index_15:4;
} desc_remap_table1;
};
 
struct gen5_interface_descriptor {
struct {
unsigned int grf_reg_blocks:4;
unsigned int pad:2;
unsigned int kernel_start_pointer:26;
} desc0;
 
struct {
unsigned int pad:7;
unsigned int software_exception:1;
unsigned int pad2:3;
unsigned int maskstack_exception:1;
unsigned int pad3:1;
unsigned int illegal_opcode_exception:1;
unsigned int pad4:2;
unsigned int floating_point_mode:1;
unsigned int thread_priority:1;
unsigned int single_program_flow:1;
unsigned int pad5:1;
unsigned int const_urb_entry_read_offset:6;
unsigned int const_urb_entry_read_len:6;
} desc1;
 
struct {
unsigned int pad:2;
unsigned int sampler_count:3;
unsigned int sampler_state_pointer:27;
} desc2;
 
struct {
unsigned int binding_table_entry_count:5;
unsigned int binding_table_pointer:27;
} desc3;
};
 
struct gen6_blend_state
{
struct {
unsigned int dest_blend_factor:5;
unsigned int source_blend_factor:5;
unsigned int pad3:1;
unsigned int blend_func:3;
unsigned int pad2:1;
unsigned int ia_dest_blend_factor:5;
unsigned int ia_source_blend_factor:5;
unsigned int pad1:1;
unsigned int ia_blend_func:3;
unsigned int pad0:1;
unsigned int ia_blend_enable:1;
unsigned int blend_enable:1;
} blend0;
 
struct {
unsigned int post_blend_clamp_enable:1;
unsigned int pre_blend_clamp_enable:1;
unsigned int clamp_range:2;
unsigned int pad0:4;
unsigned int x_dither_offset:2;
unsigned int y_dither_offset:2;
unsigned int dither_enable:1;
unsigned int alpha_test_func:3;
unsigned int alpha_test_enable:1;
unsigned int pad1:1;
unsigned int logic_op_func:4;
unsigned int logic_op_enable:1;
unsigned int pad2:1;
unsigned int write_disable_b:1;
unsigned int write_disable_g:1;
unsigned int write_disable_r:1;
unsigned int write_disable_a:1;
unsigned int pad3:1;
unsigned int alpha_to_coverage_dither:1;
unsigned int alpha_to_one:1;
unsigned int alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state
{
struct {
unsigned int alpha_test_format:1;
unsigned int pad0:14;
unsigned int round_disable:1;
unsigned int bf_stencil_ref:8;
unsigned int stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
unsigned int ui:8;
unsigned int pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state
{
struct {
unsigned int pad0:3;
unsigned int bf_stencil_pass_depth_pass_op:3;
unsigned int bf_stencil_pass_depth_fail_op:3;
unsigned int bf_stencil_fail_op:3;
unsigned int bf_stencil_func:3;
unsigned int bf_stencil_enable:1;
unsigned int pad1:2;
unsigned int stencil_write_enable:1;
unsigned int stencil_pass_depth_pass_op:3;
unsigned int stencil_pass_depth_fail_op:3;
unsigned int stencil_fail_op:3;
unsigned int stencil_func:3;
unsigned int stencil_enable:1;
} ds0;
 
struct {
unsigned int bf_stencil_write_mask:8;
unsigned int bf_stencil_test_mask:8;
unsigned int stencil_write_mask:8;
unsigned int stencil_test_mask:8;
} ds1;
 
struct {
unsigned int pad0:26;
unsigned int depth_write_enable:1;
unsigned int depth_test_func:3;
unsigned int pad1:1;
unsigned int depth_test_enable:1;
} ds2;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
typedef enum {
WM_KERNEL = 0,
WM_KERNEL_P,
 
WM_KERNEL_MASK,
WM_KERNEL_MASK_P,
 
WM_KERNEL_MASKCA,
WM_KERNEL_MASKCA_P,
 
WM_KERNEL_MASKSA,
WM_KERNEL_MASKSA_P,
 
WM_KERNEL_OPACITY,
WM_KERNEL_OPACITY_P,
 
WM_KERNEL_VIDEO_PLANAR,
WM_KERNEL_VIDEO_PACKED,
KERNEL_COUNT
} wm_kernel_t;
#endif
/drivers/video/Intel-2D/sna/gen6_render.c
0,0 → 1,3583
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen6_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_BOXES 0
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 0
#define PREFER_RENDER 0
 
#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
 
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
#error "Must select at least 8, 16 or 32 pixel dispatch"
#endif
 
#define GEN6_MAX_SIZE 8192
 
struct gt_info {
const char *name;
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
struct {
int size;
int max_vs_entries;
int max_gs_entries;
} urb;
};
 
static const struct gt_info gt1_info = {
.name = "Sandybridge (gen6, gt1)",
.max_vs_threads = 24,
.max_gs_threads = 21,
.max_wm_threads = 40,
.urb = { 32, 256, 256 },
};
 
static const struct gt_info gt2_info = {
.name = "Sandybridge (gen6, gt2)",
.max_vs_threads = 60,
.max_gs_threads = 60,
.max_wm_threads = 80,
.urb = { 64, 256, 256 },
};
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_planar.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
#define NOKERNEL(kernel_enum, func, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
#define KERNEL(kernel_enum, kernel, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
 
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
unsigned int num_surfaces;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
 
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
 
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
 
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
 
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
 
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen6_blend_op[] = {
/* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
/* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
/* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
/* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen6_blend_op.
*
* This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
* GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
 
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
 
#define SAMPLER_OFFSET(sf, se, mf, me) \
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
 
#define VERTEX_2s2s 0
 
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
 
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
#define FILL_VERTEX VERTEX_2s2s
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
 
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V)))
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
}
 
static uint32_t gen6_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
 
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN6_BLENDFACTOR_DST_ALPHA)
src = GEN6_BLENDFACTOR_ONE;
else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
src = GEN6_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen6_blend_op[op].src_alpha) {
if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
}
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, (int)BLEND_OFFSET(src, dst)));
#endif
 
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen6_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
static uint32_t gen6_get_dest_format(PictFormat format)
{
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
 
#if 0
 
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
#endif
 
}
 
#if 0
 
static bool gen6_check_dst_format(PictFormat format)
{
if (gen6_get_dest_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static bool gen6_check_format(uint32_t format)
{
if (gen6_get_card_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static uint32_t gen6_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return SAMPLER_FILTER_NEAREST;
case PictFilterBilinear:
return SAMPLER_FILTER_BILINEAR;
}
}
 
static uint32_t gen6_check_filter(PicturePtr picture)
{
switch (picture->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
 
static uint32_t gen6_repeat(uint32_t repeat)
{
switch (repeat) {
default:
assert(0);
case RepeatNone:
return SAMPLER_EXTEND_NONE;
case RepeatNormal:
return SAMPLER_EXTEND_REPEAT;
case RepeatPad:
return SAMPLER_EXTEND_PAD;
case RepeatReflect:
return SAMPLER_EXTEND_REFLECT;
}
}
 
static bool gen6_check_repeat(PicturePtr picture)
{
if (!picture->repeat)
return true;
 
switch (picture->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
#endif
 
static int
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
base = GEN6_WM_KERNEL_MASKSA;
else
base = GEN6_WM_KERNEL_MASKCA;
} else
base = GEN6_WM_KERNEL_MASK;
} else
base = GEN6_WM_KERNEL_NOMASK;
 
return base + !is_affine;
}
 
static void
gen6_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
(sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
}
 
static void
gen6_emit_state_base_address(struct sna *sna)
{
OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen6_emit_viewports(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
(4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_vs(struct sna *sna)
{
/* disable VS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
OUT_BATCH(0); /* no VS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_gs(struct sna *sna)
{
/* disable GS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
OUT_BATCH(0); /* no GS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_clip(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
OUT_BATCH(0);
}
 
static void
gen6_emit_wm_constants(struct sna *sna)
{
/* disable WM constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_null_depth_buffer(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen6_emit_invariant(struct sna *sna)
{
OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
OUT_BATCH(1);
 
gen6_emit_urb(sna);
 
gen6_emit_state_base_address(sna);
 
gen6_emit_viewports(sna);
gen6_emit_vs(sna);
gen6_emit_gs(sna);
gen6_emit_clip(sna);
gen6_emit_wm_constants(sna);
gen6_emit_null_depth_buffer(sna);
 
sna->render_state.gen6.needs_invariant = false;
}
 
static bool
gen6_emit_cc(struct sna *sna, int blend)
{
struct gen6_render_state *render = &sna->render_state.gen6;
 
if (render->blend == blend)
return blend != NO_BLEND;
 
DBG(("%s: blend = %x\n", __FUNCTION__, blend));
 
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
OUT_BATCH((render->cc_blend + blend) | 1);
if (render->blend == (unsigned)-1) {
OUT_BATCH(1);
OUT_BATCH(1);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
}
 
render->blend = blend;
return blend != NO_BLEND;
}
 
static void
gen6_emit_sampler(struct sna *sna, uint32_t state)
{
if (sna->render_state.gen6.samplers == state)
return;
 
sna->render_state.gen6.samplers = state;
 
DBG(("%s: sampler = %x\n", __FUNCTION__, state));
 
OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
OUT_BATCH(sna->render_state.gen6.wm_state + state);
}
 
static void
gen6_emit_sf(struct sna *sna, bool has_mask)
{
int num_sf_outputs = has_mask ? 2 : 1;
 
if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
return;
 
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
__FUNCTION__, num_sf_outputs, 1, 0));
 
sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
 
OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0);
OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW9 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW14 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW19 */
}
 
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
{
const uint32_t *kernels;
 
if (sna->render_state.gen6.kernel == kernel)
return;
 
sna->render_state.gen6.kernel = kernel;
kernels = sna->render_state.gen6.wm_kernel[kernel];
 
DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
kernels[0], kernels[1], kernels[2]));
 
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch space */
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
(kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
}
 
static bool
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen6.surface_table == offset)
return false;
 
/* Binding table pointers */
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
 
sna->render_state.gen6.surface_table = offset;
return true;
}
 
static bool
gen6_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
return false;
 
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*/
if (!sna->render_state.gen6.first_state_packet) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16 |
I915_GEM_DOMAIN_INSTRUCTION,
64));
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
 
sna->render_state.gen6.drawrect_offset = offset;
sna->render_state.gen6.drawrect_limit = limit;
return true;
}
 
static void
gen6_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen6_render_state *render = &sna->render_state.gen6;
uint32_t src_format, dw;
int id = GEN6_VERTEX(op->u.gen6.flags);
bool has_mask;
 
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
((2 * (3 + has_mask)) + 1 - 2));
 
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
static void
gen6_emit_flush(struct sna *sna)
{
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
GEN6_PIPE_CONTROL_TC_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall = wm_binding_table & 1;
 
assert(op->dst.bo->exec);
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_vertex_elements(sna, op);
 
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
if (gen6_emit_drawing_rectangle(sna, op))
need_stall = false;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
if (need_stall) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
sna->render_state.gen6.first_state_packet = false;
}
 
static bool gen6_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen6_render_state *state = &sna->render_state.gen6;
 
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
 
gen6_emit_flush(sna);
 
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine),
true);
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
typedef struct gen6_surface_state_padded {
struct gen6_surface_state state;
char pad[32 - sizeof(struct gen6_surface_state)];
} gen6_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void scratch_create(struct sna_static_stream *stream)
{
/* 64 bytes of scratch space for random writes, such as
* the pipe-control w/a.
*/
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen6_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
break;
}
}
 
static void
sampler_copy_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static void
sampler_fill_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN6_SURFACE_TILED;
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
gen6_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t *ss;
uint32_t domains;
uint16_t offset;
uint32_t is_scanout = is_dst && bo->scanout;
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
is_dst ? "render" : "sampler"));
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
if (is_dst) {
ss[0] |= GEN6_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
assert(bo->pitch <= (1 << 18));
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = (is_scanout || bo->io) ? 0 : 3 << 16;
 
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen6_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = GEN6_VERTEX(op->u.gen6.flags);
 
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen6_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
__FUNCTION__,
sna->render.vertex_start,
sna->render.vertex_index));
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
DBG(("%s: started new primitive: index=%d\n",
__FUNCTION__, sna->render.vertex_start));
 
sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
}
 
static bool gen6_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 60 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen6_emit_vertex_buffer(sna, op);
 
gen6_emit_primitive(sna);
return true;
}
 
static int gen6_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen6_magic_ca_pass(sna, op)) {
gen6_emit_flush(sna);
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
gen6_emit_wm(sna,
GEN6_KERNEL(op->u.gen6.flags),
GEN6_VERTEX(op->u.gen6.flags) >> 2);
}
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen6_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen6_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen6_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
uint32_t *table;
 
sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
/* Clear all surplus entries to zero in case of prefetch */
table = memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen6_surface_state_padded));
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
*offset = sna->kgem.surface;
return table;
}
 
static bool
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen6.needs_invariant)
gen6_emit_invariant(sna);
 
return kgem_bo_is_dirty(op->dst.bo);
}
 
static void gen6_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
binding_table[2] =
gen6_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static void
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert (sna->render.vertex_offset == 0);
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen6.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
 
fastcall static void
gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, r);
}
 
#if 0
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen6_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen6_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t
gen6_composite_create_blend_state(struct sna_static_stream *stream)
{
char *base, *ptr;
int src, dst;
 
base = sna_static_stream_map(stream,
GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
64);
 
ptr = base;
for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
struct gen6_blend_state *blend =
(struct gen6_blend_state *)ptr;
 
blend->blend0.dest_blend_factor = dst;
blend->blend0.source_blend_factor = src;
blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
blend->blend0.blend_enable =
!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
 
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.pre_blend_clamp_enable = 1;
 
ptr += GEN6_BLEND_STATE_PADDED_SIZE;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
static uint32_t gen6_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen6_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN6_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen6_emit_video_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
bool dirty;
int n_src, n;
 
dirty = gen6_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen6_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static bool
gen6_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_width == dst_width && src_height == dst_height)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN6_WM_KERNEL_VIDEO_PLANAR :
GEN6_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_video_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen4_vertex_flush(sna);
return true;
}
 
static int
gen6_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen6_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen6_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen6_get_card_format(picture->format);
if (channel->card_format == (unsigned)-1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
{
channel->repeat = gen6_repeat(channel->repeat);
channel->filter = gen6_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen6_get_card_format(channel->pict_format);
assert(channel->card_format != (unsigned)-1);
}
#endif
 
static void gen6_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
 
 
}
 
#if 0
static bool
gen6_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h,
bool partial)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.format = dst->format;
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
 
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
// op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
// PREFER_GPU | FORCE_GPU | RENDER_GPU,
// &box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h))
return false;
 
return true;
}
 
static bool
prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp)
{
if (untiled_tlb_miss(tmp->dst.bo) ||
untiled_tlb_miss(tmp->src.bo))
return true;
 
if (kgem_bo_is_render(tmp->dst.bo) ||
kgem_bo_is_render(tmp->src.bo))
return false;
 
if (!prefer_blt_ring(sna, tmp->dst.bo, 0))
return false;
 
return prefer_blt_bo(sna, tmp->dst.bo) || prefer_blt_bo(sna, tmp->src.bo);
}
 
static bool
gen6_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen6_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (op == PictOpClear)
op = PictOpSrc;
tmp->op = op;
if (!gen6_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height,
op > PictOpSrc || dst->pCompositeClip->data))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
/* Did we just switch rings to prepare the source? */
if (mask == NULL &&
prefer_blt_composite(sna, tmp) &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen6_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.bo = NULL;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen6_blend_op[op].src_alpha &&
(gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
if (op != PictOpOver)
goto cleanup_src;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen6_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
tmp->boxes = gen6_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen6_render_composite_boxes;
tmp->thread_boxes = gen6_render_composite_boxes__thread;
}
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL))
goto cleanup_mask;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen6_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen6_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
assert(!sna->render.active);
 
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen6_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
if (gen6_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen6_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen6_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height, true))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->base.src);
break;
}
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
tmp->base.src.repeat,
SAMPLER_FILTER_NEAREST,
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
gen4_choose_spans_emitter(sna, tmp));
 
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, &tmp->base);
gen6_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
static void
gen6_emit_copy_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *src_bo,
struct kgem_bo *dst_bo,
unsigned flags)
{
if (flags & COPY_SYNC)
return false;
 
if (PREFER_RENDER)
return PREFER_RENDER > 0;
 
if (sna->kgem.ring == KGEM_BLT)
return true;
 
if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
return true;
 
if (untiled_tlb_miss(src_bo) ||
untiled_tlb_miss(dst_bo))
return true;
 
if (kgem_bo_is_render(dst_bo) ||
kgem_bo_is_render(src_bo))
return false;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
return prefer_blt_bo(sna, src_bo) || prefer_blt_bo(sna, dst_bo);
}
 
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
{
*extents = box[0];
while (--n) {
box++;
 
if (box->x1 < extents->x1)
extents->x1 = box->x1;
if (box->x2 > extents->x2)
extents->x2 = box->x2;
 
if (box->y1 < extents->y1)
extents->y1 = box->y1;
if (box->y2 > extents->y2)
extents->y2 = box->y2;
}
}
 
static inline bool
overlaps(struct sna *sna,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, BoxRec *extents)
{
if (src_bo != dst_bo)
return false;
 
boxes_extents(box, n, extents);
return (extents->x2 + src_dx > extents->x1 + dst_dx &&
extents->x1 + src_dx < extents->x2 + dst_dx &&
extents->y2 + src_dy > extents->y1 + dst_dy &&
extents->y1 + src_dy < extents->y2 + dst_dy);
}
 
static bool
gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
BoxRec extents;
 
DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
src_bo == dst_bo,
overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear)) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
if (overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)) {
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
goto fallback_blt;
 
if (can_switch_to_blt(sna, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_render_copy_boxes__overlap(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n, &extents);
}
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(tmp.src.pict_format))
goto fallback_blt;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
goto fallback_tiled;
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
 
tmp.dst.x = tmp.dst.y = 0;
}
 
tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1)) {
DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
goto fallback_tiled_dst;
}
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
} else {
tmp.src.bo = src_bo;
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
}
 
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = 0;
 
tmp.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
DBG(("%s: too large for a single operation\n",
__FUNCTION__));
goto fallback_tiled_src;
}
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_copy_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int16_t *v;
int n_this_time;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_copy_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
 
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
v[0] = box->x2 + dst_dx;
v[2] = box->x2 + src_dx;
v[1] = v[5] = box->y2 + dst_dy;
v[3] = v[7] = box->y2 + src_dy;
v[8] = v[4] = box->x1 + dst_dx;
v[10] = v[6] = box->x1 + src_dx;
v[9] = box->y1 + dst_dy;
v[11] = box->y1 + src_dy;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
static void
gen6_render_copy_blt(struct sna *sna,
const struct sna_copy_op *op,
int16_t sx, int16_t sy,
int16_t w, int16_t h,
int16_t dx, int16_t dy)
{
int16_t *v;
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dx+w; v[1] = dy+h;
v[2] = sx+w; v[3] = sy+h;
v[4] = dx; v[5] = dy+h;
v[6] = sx; v[7] = sy+h;
v[8] = dx; v[9] = dy;
v[10] = sx; v[11] = sy;
}
 
static void
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
}
 
static bool
gen6_render_copy(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
struct sna_copy_op *op)
{
DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
__FUNCTION__, alu,
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, 0) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy(sna, alu,
src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
too_large(src->drawable.width, src->drawable.height) ||
too_large(dst->drawable.width, dst->drawable.height)) {
fallback:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy(sna, alu, src_bo, dst_bo,
dst->drawable.bitsPerPixel,
op);
}
 
if (dst->drawable.depth == src->drawable.depth) {
op->base.dst.format = sna_render_format_for_depth(dst->drawable.depth);
op->base.src.pict_format = op->base.dst.format;
} else {
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(op->base.src.pict_format))
goto fallback;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.bo = dst_bo;
 
op->base.src.bo = src_bo;
op->base.src.card_format =
gen6_get_card_format(op->base.src.pict_format);
op->base.src.width = src->drawable.width;
op->base.src.height = src->drawable.height;
 
op->base.mask.bo = NULL;
 
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
goto fallback;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_copy_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_copy_blt;
op->done = gen6_render_copy_done;
return true;
}
 
static void
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, 1, 1,
GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface +=
sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (kgem_bo_is_render(bo))
return false;
 
if (untiled_tlb_miss(bo))
return true;
 
if (!prefer_blt_ring(sna, bo, 0))
return false;
 
return prefer_blt_bo(sna, bo);
}
 
static bool
gen6_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen6_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen6_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS(op, format);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
 
static void
gen6_render_op_fill_blt(struct sna *sna,
const struct sna_fill_op *op,
int16_t x, int16_t y, int16_t w, int16_t h)
{
int16_t *v;
 
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x+w;
v[4] = v[8] = x;
v[1] = v[5] = y+h;
v[9] = y;
 
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_box(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box)
{
int16_t *v;
 
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_boxes(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box,
int nbox)
{
DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2, nbox));
 
do {
int nbox_this_time;
int16_t *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_fill_state);
nbox -= nbox_this_time;
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6 * nbox_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
do {
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
box++; v += 12;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
 
static bool
gen6_render_fill(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
struct sna_fill_op *op)
{
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
 
if (prefer_blt_fill(sna, dst_bo) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op);
 
if (alu == GXclear)
color = 0;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.dst.bo = dst_bo;
op->base.dst.x = op->base.dst.y = 0;
 
op->base.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
op->base.mask.bo = NULL;
 
op->base.need_magic_ca_pass = false;
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_op_fill_blt;
op->box = gen6_render_op_fill_box;
op->boxes = gen6_render_op_fill_boxes;
op->done = gen6_render_op_fill_done;
return true;
}
 
static bool
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint8_t alu)
{
BoxRec box;
 
box.x1 = x1;
box.y1 = y1;
box.x2 = x2;
box.y2 = y2;
 
return sna_blt_fill_boxes(sna, alu,
bo, dst->drawable.bitsPerPixel,
color, &box, 1);
}
 
static bool
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1,
int16_t x2, int16_t y2,
uint8_t alu)
{
struct sna_composite_op tmp;
int16_t *v;
 
/* Prefer to use the BLT if already engaged */
if (prefer_blt_fill(sna, bo) &&
gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
 
if (alu == GXclear)
color = 0;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return false;
}
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x2;
v[8] = v[4] = x1;
v[5] = v[1] = y2;
v[9] = y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
 
static bool
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
BoxRec box;
 
box.x1 = 0;
box.y1 = 0;
box.x2 = dst->drawable.width;
box.y2 = dst->drawable.height;
 
return sna_blt_fill_boxes(sna, GXclear,
bo, dst->drawable.bitsPerPixel,
0, &box, 1);
}
 
static bool
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
struct sna_composite_op tmp;
int16_t *v;
 
DBG(("%s: %dx%d\n",
__FUNCTION__,
dst->drawable.width,
dst->drawable.height));
 
/* Prefer to use the BLT if, and only if, already engaged */
if (sna->kgem.ring == KGEM_BLT &&
gen6_render_clear_try_blt(sna, dst, bo))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_clear_try_blt(sna, dst, bo);
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo = sna_render_get_solid(sna, 0);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return false;
}
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dst->drawable.width;
v[5] = v[1] = dst->drawable.height;
v[8] = v[4] = 0;
v[9] = 0;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
#endif
 
static void gen6_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen6_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen6_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen6_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
assert(!sna->render.active);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen6_render_reset(struct sna *sna)
{
sna->render_state.gen6.needs_invariant = true;
sna->render_state.gen6.first_state_packet = true;
sna->render_state.gen6.ve_id = 3 << 2;
sna->render_state.gen6.last_primitive = -1;
 
sna->render_state.gen6.num_sf_outputs = 0;
sna->render_state.gen6.samplers = -1;
sna->render_state.gen6.blend = -1;
sna->render_state.gen6.kernel = -1;
sna->render_state.gen6.drawrect_offset = -1;
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen6_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
}
 
static bool is_gt2(struct sna *sna)
{
return sna->PciInfo->device_id & 0x30;
}
 
static bool is_mobile(struct sna *sna)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
{
struct gen6_render_state *state = &sna->render_state.gen6;
struct sna_static_stream general;
struct gen6_sampler_state *ss;
int i, j, k, l, m;
 
state->info = &gt1_info;
if (is_gt2(sna))
state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
scratch_create(&general);
 
for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
if (USE_8_PIXEL_DISPATCH) {
state->wm_kernel[m][0] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 8);
}
 
if (USE_16_PIXEL_DISPATCH) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
 
if (USE_32_PIXEL_DISPATCH) {
state->wm_kernel[m][2] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 32);
}
}
if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
}
 
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
(2 +
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT),
32);
state->wm_state = sna_static_stream_offsetof(&general, ss);
sampler_copy_init(ss); ss += 2;
sampler_fill_init(ss); ss += 2;
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
sampler_state_init(ss++, i, j);
sampler_state_init(ss++, k, l);
}
}
}
}
 
state->cc_blend = gen6_composite_create_blend_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
const char *gen6_render_init(struct sna *sna, const char *backend)
{
if (!gen6_render_setup(sna))
return backend;
 
sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
sna->kgem.expire = gen6_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen6_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
 
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen6_check_composite_spans;
sna->render.composite_spans = gen6_render_composite_spans;
if (is_mobile(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen6_render_video;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen6_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen6_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen6_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen6_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen6_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen6_render_clear;
#endif
#endif
 
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
sna->render.blit_tex = gen6_blit_tex;
 
sna->render.flush = gen6_render_flush;
sna->render.reset = gen6_render_reset;
sna->render.fini = gen6_render_fini;
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->render_state.gen6.info->name;
}
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_a8r8g8b8;
 
 
tmp->src.repeat = SAMPLER_EXTEND_NONE;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen6_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
if ( (tmp->src.width == width) &&
(tmp->src.height == height) )
tmp->src.filter = SAMPLER_FILTER_NEAREST;
else
tmp->src.filter = SAMPLER_FILTER_BILINEAR;
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen6_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
// tmp->src.offset[0] = -dst_x;
// tmp->src.offset[1] = -dst_y;
 
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
// tmp->mask.offset[0] = -dst_x;
// tmp->mask.offset[1] = -dst_y;
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
*/
GEN6_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen6_render_composite_blt;
// tmp->box = gen6_render_composite_box;
tmp->done = gen6_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
}
/drivers/video/Intel-2D/sna/gen6_render.h
0,0 → 1,1563
#ifndef GEN6_RENDER_H
#define GEN6_RENDER_H
 
#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16
# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8
# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 20)
#define VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN6_3DSTATE_PIPELINED_POINTERS */
#define GEN6_GS_DISABLE 0
#define GEN6_GS_ENABLE 1
#define GEN6_CLIP_DISABLE 0
#define GEN6_CLIP_ENABLE 1
 
/* for GEN6_PIPE_CONTROL */
#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN6_ANISORATIO_2 0
#define GEN6_ANISORATIO_4 1
#define GEN6_ANISORATIO_6 2
#define GEN6_ANISORATIO_8 3
#define GEN6_ANISORATIO_10 4
#define GEN6_ANISORATIO_12 5
#define GEN6_ANISORATIO_14 6
#define GEN6_ANISORATIO_16 7
 
#define GEN6_BLENDFACTOR_ONE 0x1
#define GEN6_BLENDFACTOR_SRC_COLOR 0x2
#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN6_BLENDFACTOR_DST_ALPHA 0x4
#define GEN6_BLENDFACTOR_DST_COLOR 0x5
#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN6_BLENDFACTOR_CONST_COLOR 0x7
#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN6_BLENDFACTOR_ZERO 0x11
#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN6_BLENDFUNCTION_ADD 0
#define GEN6_BLENDFUNCTION_SUBTRACT 1
#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN6_BLENDFUNCTION_MIN 3
#define GEN6_BLENDFUNCTION_MAX 4
 
#define GEN6_ALPHATEST_FORMAT_UNORM8 0
#define GEN6_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN6_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN6_CLIP_API_OGL 0
#define GEN6_CLIP_API_DX 1
 
#define GEN6_CLIPMODE_NORMAL 0
#define GEN6_CLIPMODE_CLIP_ALL 1
#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN6_CLIPMODE_REJECT_ALL 3
#define GEN6_CLIPMODE_ACCEPT_ALL 4
 
#define GEN6_CLIP_NDCSPACE 0
#define GEN6_CLIP_SCREENSPACE 1
 
#define GEN6_COMPAREFUNCTION_ALWAYS 0
#define GEN6_COMPAREFUNCTION_NEVER 1
#define GEN6_COMPAREFUNCTION_LESS 2
#define GEN6_COMPAREFUNCTION_EQUAL 3
#define GEN6_COMPAREFUNCTION_LEQUAL 4
#define GEN6_COMPAREFUNCTION_GREATER 5
#define GEN6_COMPAREFUNCTION_NOTEQUAL 6
#define GEN6_COMPAREFUNCTION_GEQUAL 7
 
#define GEN6_COVERAGE_PIXELS_HALF 0
#define GEN6_COVERAGE_PIXELS_1 1
#define GEN6_COVERAGE_PIXELS_2 2
#define GEN6_COVERAGE_PIXELS_4 3
 
#define GEN6_CULLMODE_BOTH 0
#define GEN6_CULLMODE_NONE 1
#define GEN6_CULLMODE_FRONT 2
#define GEN6_CULLMODE_BACK 3
 
#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN6_DEPTHFORMAT_D32_FLOAT 1
#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN6_DEPTHFORMAT_D16_UNORM 5
 
#define GEN6_FLOATING_POINT_IEEE_754 0
#define GEN6_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN6_FRONTWINDING_CW 0
#define GEN6_FRONTWINDING_CCW 1
 
#define GEN6_INDEX_BYTE 0
#define GEN6_INDEX_WORD 1
#define GEN6_INDEX_DWORD 2
 
#define GEN6_LOGICOPFUNCTION_CLEAR 0
#define GEN6_LOGICOPFUNCTION_NOR 1
#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN6_LOGICOPFUNCTION_INVERT 5
#define GEN6_LOGICOPFUNCTION_XOR 6
#define GEN6_LOGICOPFUNCTION_NAND 7
#define GEN6_LOGICOPFUNCTION_AND 8
#define GEN6_LOGICOPFUNCTION_EQUIV 9
#define GEN6_LOGICOPFUNCTION_NOOP 10
#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN6_LOGICOPFUNCTION_COPY 12
#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN6_LOGICOPFUNCTION_OR 14
#define GEN6_LOGICOPFUNCTION_SET 15
 
#define GEN6_MAPFILTER_NEAREST 0x0
#define GEN6_MAPFILTER_LINEAR 0x1
#define GEN6_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN6_MIPFILTER_NONE 0
#define GEN6_MIPFILTER_NEAREST 1
#define GEN6_MIPFILTER_LINEAR 3
 
#define GEN6_POLYGON_FRONT_FACING 0
#define GEN6_POLYGON_BACK_FACING 1
 
#define GEN6_PREFILTER_ALWAYS 0x0
#define GEN6_PREFILTER_NEVER 0x1
#define GEN6_PREFILTER_LESS 0x2
#define GEN6_PREFILTER_EQUAL 0x3
#define GEN6_PREFILTER_LEQUAL 0x4
#define GEN6_PREFILTER_GREATER 0x5
#define GEN6_PREFILTER_NOTEQUAL 0x6
#define GEN6_PREFILTER_GEQUAL 0x7
 
#define GEN6_PROVOKING_VERTEX_0 0
#define GEN6_PROVOKING_VERTEX_1 1
#define GEN6_PROVOKING_VERTEX_2 2
 
#define GEN6_RASTRULE_UPPER_LEFT 0
#define GEN6_RASTRULE_UPPER_RIGHT 1
 
#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN6_STENCILOP_KEEP 0
#define GEN6_STENCILOP_ZERO 1
#define GEN6_STENCILOP_REPLACE 2
#define GEN6_STENCILOP_INCRSAT 3
#define GEN6_STENCILOP_DECRSAT 4
#define GEN6_STENCILOP_INCR 5
#define GEN6_STENCILOP_DECR 6
#define GEN6_STENCILOP_INVERT 7
 
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN6_SURFACEFORMAT_R16_SINT 0x10C
#define GEN6_SURFACEFORMAT_R16_UINT 0x10D
#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN6_SURFACEFORMAT_I16_UNORM 0x111
#define GEN6_SURFACEFORMAT_L16_UNORM 0x112
#define GEN6_SURFACEFORMAT_A16_UNORM 0x113
#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN6_SURFACEFORMAT_R8_UNORM 0x140
#define GEN6_SURFACEFORMAT_R8_SNORM 0x141
#define GEN6_SURFACEFORMAT_R8_SINT 0x142
#define GEN6_SURFACEFORMAT_R8_UINT 0x143
#define GEN6_SURFACEFORMAT_A8_UNORM 0x144
#define GEN6_SURFACEFORMAT_I8_UNORM 0x145
#define GEN6_SURFACEFORMAT_L8_UNORM 0x146
#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN6_SURFACEFORMAT_R1_UINT 0x181
#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN6_SURFACEFORMAT_MONO8 0x18E
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN6_SURFACEFORMAT_FXT1 0x192
#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN6_SURFACERETURNFORMAT_FLOAT32 0
#define GEN6_SURFACERETURNFORMAT_S1 1
 
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
#define GEN6_BORDER_COLOR_MODE_DEFAULT 0
#define GEN6_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN6_TEXCOORDMODE_WRAP 0
#define GEN6_TEXCOORDMODE_MIRROR 1
#define GEN6_TEXCOORDMODE_CLAMP 2
#define GEN6_TEXCOORDMODE_CUBE 3
#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN6_THREAD_PRIORITY_NORMAL 0
#define GEN6_THREAD_PRIORITY_HIGH 1
 
#define GEN6_TILEWALK_XMAJOR 0
#define GEN6_TILEWALK_YMAJOR 1
 
#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define GEN6_VFCOMPONENT_NOSTORE 0
#define GEN6_VFCOMPONENT_STORE_SRC 1
#define GEN6_VFCOMPONENT_STORE_0 2
#define GEN6_VFCOMPONENT_STORE_1_FLT 3
#define GEN6_VFCOMPONENT_STORE_1_INT 4
#define GEN6_VFCOMPONENT_STORE_VID 5
#define GEN6_VFCOMPONENT_STORE_IID 6
#define GEN6_VFCOMPONENT_STORE_PID 7
 
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN6_ALIGN_1 0
#define GEN6_ALIGN_16 1
 
#define GEN6_ADDRESS_DIRECT 0
#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN6_CHANNEL_X 0
#define GEN6_CHANNEL_Y 1
#define GEN6_CHANNEL_Z 2
#define GEN6_CHANNEL_W 3
 
#define GEN6_COMPRESSION_NONE 0
#define GEN6_COMPRESSION_2NDHALF 1
#define GEN6_COMPRESSION_COMPRESSED 2
 
#define GEN6_CONDITIONAL_NONE 0
#define GEN6_CONDITIONAL_Z 1
#define GEN6_CONDITIONAL_NZ 2
#define GEN6_CONDITIONAL_EQ 1 /* Z */
#define GEN6_CONDITIONAL_NEQ 2 /* NZ */
#define GEN6_CONDITIONAL_G 3
#define GEN6_CONDITIONAL_GE 4
#define GEN6_CONDITIONAL_L 5
#define GEN6_CONDITIONAL_LE 6
#define GEN6_CONDITIONAL_C 7
#define GEN6_CONDITIONAL_O 8
 
#define GEN6_DEBUG_NONE 0
#define GEN6_DEBUG_BREAKPOINT 1
 
#define GEN6_DEPENDENCY_NORMAL 0
#define GEN6_DEPENDENCY_NOTCLEARED 1
#define GEN6_DEPENDENCY_NOTCHECKED 2
#define GEN6_DEPENDENCY_DISABLE 3
 
#define GEN6_EXECUTE_1 0
#define GEN6_EXECUTE_2 1
#define GEN6_EXECUTE_4 2
#define GEN6_EXECUTE_8 3
#define GEN6_EXECUTE_16 4
#define GEN6_EXECUTE_32 5
 
#define GEN6_HORIZONTAL_STRIDE_0 0
#define GEN6_HORIZONTAL_STRIDE_1 1
#define GEN6_HORIZONTAL_STRIDE_2 2
#define GEN6_HORIZONTAL_STRIDE_4 3
 
#define GEN6_INSTRUCTION_NORMAL 0
#define GEN6_INSTRUCTION_SATURATE 1
 
#define GEN6_MASK_ENABLE 0
#define GEN6_MASK_DISABLE 1
 
#define GEN6_OPCODE_MOV 1
#define GEN6_OPCODE_SEL 2
#define GEN6_OPCODE_NOT 4
#define GEN6_OPCODE_AND 5
#define GEN6_OPCODE_OR 6
#define GEN6_OPCODE_XOR 7
#define GEN6_OPCODE_SHR 8
#define GEN6_OPCODE_SHL 9
#define GEN6_OPCODE_RSR 10
#define GEN6_OPCODE_RSL 11
#define GEN6_OPCODE_ASR 12
#define GEN6_OPCODE_CMP 16
#define GEN6_OPCODE_JMPI 32
#define GEN6_OPCODE_IF 34
#define GEN6_OPCODE_IFF 35
#define GEN6_OPCODE_ELSE 36
#define GEN6_OPCODE_ENDIF 37
#define GEN6_OPCODE_DO 38
#define GEN6_OPCODE_WHILE 39
#define GEN6_OPCODE_BREAK 40
#define GEN6_OPCODE_CONTINUE 41
#define GEN6_OPCODE_HALT 42
#define GEN6_OPCODE_MSAVE 44
#define GEN6_OPCODE_MRESTORE 45
#define GEN6_OPCODE_PUSH 46
#define GEN6_OPCODE_POP 47
#define GEN6_OPCODE_WAIT 48
#define GEN6_OPCODE_SEND 49
#define GEN6_OPCODE_ADD 64
#define GEN6_OPCODE_MUL 65
#define GEN6_OPCODE_AVG 66
#define GEN6_OPCODE_FRC 67
#define GEN6_OPCODE_RNDU 68
#define GEN6_OPCODE_RNDD 69
#define GEN6_OPCODE_RNDE 70
#define GEN6_OPCODE_RNDZ 71
#define GEN6_OPCODE_MAC 72
#define GEN6_OPCODE_MACH 73
#define GEN6_OPCODE_LZD 74
#define GEN6_OPCODE_SAD2 80
#define GEN6_OPCODE_SADA2 81
#define GEN6_OPCODE_DP4 84
#define GEN6_OPCODE_DPH 85
#define GEN6_OPCODE_DP3 86
#define GEN6_OPCODE_DP2 87
#define GEN6_OPCODE_DPA2 88
#define GEN6_OPCODE_LINE 89
#define GEN6_OPCODE_NOP 126
 
#define GEN6_PREDICATE_NONE 0
#define GEN6_PREDICATE_NORMAL 1
#define GEN6_PREDICATE_ALIGN1_ANYV 2
#define GEN6_PREDICATE_ALIGN1_ALLV 3
#define GEN6_PREDICATE_ALIGN1_ANY2H 4
#define GEN6_PREDICATE_ALIGN1_ALL2H 5
#define GEN6_PREDICATE_ALIGN1_ANY4H 6
#define GEN6_PREDICATE_ALIGN1_ALL4H 7
#define GEN6_PREDICATE_ALIGN1_ANY8H 8
#define GEN6_PREDICATE_ALIGN1_ALL8H 9
#define GEN6_PREDICATE_ALIGN1_ANY16H 10
#define GEN6_PREDICATE_ALIGN1_ALL16H 11
#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN6_PREDICATE_ALIGN16_ANY4H 6
#define GEN6_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN6_ARCHITECTURE_REGISTER_FILE 0
#define GEN6_GENERAL_REGISTER_FILE 1
#define GEN6_MESSAGE_REGISTER_FILE 2
#define GEN6_IMMEDIATE_VALUE 3
 
#define GEN6_REGISTER_TYPE_UD 0
#define GEN6_REGISTER_TYPE_D 1
#define GEN6_REGISTER_TYPE_UW 2
#define GEN6_REGISTER_TYPE_W 3
#define GEN6_REGISTER_TYPE_UB 4
#define GEN6_REGISTER_TYPE_B 5
#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN6_REGISTER_TYPE_HF 6
#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN6_REGISTER_TYPE_F 7
 
#define GEN6_ARF_NULL 0x00
#define GEN6_ARF_ADDRESS 0x10
#define GEN6_ARF_ACCUMULATOR 0x20
#define GEN6_ARF_FLAG 0x30
#define GEN6_ARF_MASK 0x40
#define GEN6_ARF_MASK_STACK 0x50
#define GEN6_ARF_MASK_STACK_DEPTH 0x60
#define GEN6_ARF_STATE 0x70
#define GEN6_ARF_CONTROL 0x80
#define GEN6_ARF_NOTIFICATION_COUNT 0x90
#define GEN6_ARF_IP 0xA0
 
#define GEN6_AMASK 0
#define GEN6_IMASK 1
#define GEN6_LMASK 2
#define GEN6_CMASK 3
 
 
 
#define GEN6_THREAD_NORMAL 0
#define GEN6_THREAD_ATOMIC 1
#define GEN6_THREAD_SWITCH 2
 
#define GEN6_VERTICAL_STRIDE_0 0
#define GEN6_VERTICAL_STRIDE_1 1
#define GEN6_VERTICAL_STRIDE_2 2
#define GEN6_VERTICAL_STRIDE_4 3
#define GEN6_VERTICAL_STRIDE_8 4
#define GEN6_VERTICAL_STRIDE_16 5
#define GEN6_VERTICAL_STRIDE_32 6
#define GEN6_VERTICAL_STRIDE_64 7
#define GEN6_VERTICAL_STRIDE_128 8
#define GEN6_VERTICAL_STRIDE_256 9
#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN6_WIDTH_1 0
#define GEN6_WIDTH_2 1
#define GEN6_WIDTH_4 2
#define GEN6_WIDTH_8 3
#define GEN6_WIDTH_16 4
 
#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN6_POLYGON_FACING_FRONT 0
#define GEN6_POLYGON_FACING_BACK 1
 
#define GEN6_MESSAGE_TARGET_NULL 0
#define GEN6_MESSAGE_TARGET_MATH 1
#define GEN6_MESSAGE_TARGET_SAMPLER 2
#define GEN6_MESSAGE_TARGET_GATEWAY 3
#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN6_MESSAGE_TARGET_URB 6
#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN6_MATH_INTEGER_UNSIGNED 0
#define GEN6_MATH_INTEGER_SIGNED 1
 
#define GEN6_MATH_PRECISION_FULL 0
#define GEN6_MATH_PRECISION_PARTIAL 1
 
#define GEN6_MATH_SATURATE_NONE 0
#define GEN6_MATH_SATURATE_SATURATE 1
 
#define GEN6_MATH_DATA_VECTOR 0
#define GEN6_MATH_DATA_SCALAR 1
 
#define GEN6_URB_OPCODE_WRITE 0
 
#define GEN6_URB_SWIZZLE_NONE 0
#define GEN6_URB_SWIZZLE_INTERLEAVE 1
#define GEN6_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN6_SCRATCH_SPACE_SIZE_1K 0
#define GEN6_SCRATCH_SPACE_SIZE_2K 1
#define GEN6_SCRATCH_SPACE_SIZE_4K 2
#define GEN6_SCRATCH_SPACE_SIZE_8K 3
#define GEN6_SCRATCH_SPACE_SIZE_16K 4
#define GEN6_SCRATCH_SPACE_SIZE_32K 5
#define GEN6_SCRATCH_SPACE_SIZE_64K 6
#define GEN6_SCRATCH_SPACE_SIZE_128K 7
#define GEN6_SCRATCH_SPACE_SIZE_256K 8
#define GEN6_SCRATCH_SPACE_SIZE_512K 9
#define GEN6_SCRATCH_SPACE_SIZE_1M 10
#define GEN6_SCRATCH_SPACE_SIZE_2M 11
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen6_sampler_default_border_color {
float color[4];
};
 
struct gen6_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen6_sampler_state {
struct {
uint32_t shadow_function:3;
uint32_t lod_bias:11;
uint32_t min_filter:3;
uint32_t mag_filter:3;
uint32_t mip_filter:2;
uint32_t base_level:5;
uint32_t pad:1;
uint32_t lod_preclamp:1;
uint32_t border_color_mode:1;
uint32_t pad0:1;
uint32_t disable:1;
} ss0;
 
struct {
uint32_t r_wrap_mode:3;
uint32_t t_wrap_mode:3;
uint32_t s_wrap_mode:3;
uint32_t pad:3;
uint32_t max_lod:10;
uint32_t min_lod:10;
} ss1;
 
struct {
uint32_t border_color;
} ss2;
 
struct {
uint32_t non_normalized_coord:1;
uint32_t pad:12;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t monochrome_filter_width:3;
uint32_t monochrome_filter_height:3;
} ss3;
};
 
struct gen6_blend_state {
struct {
uint32_t dest_blend_factor:5;
uint32_t source_blend_factor:5;
uint32_t pad3:1;
uint32_t blend_func:3;
uint32_t pad2:1;
uint32_t ia_dest_blend_factor:5;
uint32_t ia_source_blend_factor:5;
uint32_t pad1:1;
uint32_t ia_blend_func:3;
uint32_t pad0:1;
uint32_t ia_blend_enable:1;
uint32_t blend_enable:1;
} blend0;
 
struct {
uint32_t post_blend_clamp_enable:1;
uint32_t pre_blend_clamp_enable:1;
uint32_t clamp_range:2;
uint32_t pad0:4;
uint32_t x_dither_offset:2;
uint32_t y_dither_offset:2;
uint32_t dither_enable:1;
uint32_t alpha_test_func:3;
uint32_t alpha_test_enable:1;
uint32_t pad1:1;
uint32_t logic_op_func:4;
uint32_t logic_op_enable:1;
uint32_t pad2:1;
uint32_t write_disable_b:1;
uint32_t write_disable_g:1;
uint32_t write_disable_r:1;
uint32_t write_disable_a:1;
uint32_t pad3:1;
uint32_t alpha_to_coverage_dither:1;
uint32_t alpha_to_one:1;
uint32_t alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state {
struct {
uint32_t alpha_test_format:1;
uint32_t pad0:14;
uint32_t round_disable:1;
uint32_t bf_stencil_ref:8;
uint32_t stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
uint32_t ui:8;
uint32_t pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state {
struct {
uint32_t pad0:3;
uint32_t bf_stencil_pass_depth_pass_op:3;
uint32_t bf_stencil_pass_depth_fail_op:3;
uint32_t bf_stencil_fail_op:3;
uint32_t bf_stencil_func:3;
uint32_t bf_stencil_enable:1;
uint32_t pad1:2;
uint32_t stencil_write_enable:1;
uint32_t stencil_pass_depth_pass_op:3;
uint32_t stencil_pass_depth_fail_op:3;
uint32_t stencil_fail_op:3;
uint32_t stencil_func:3;
uint32_t stencil_enable:1;
} ds0;
 
struct {
uint32_t bf_stencil_write_mask:8;
uint32_t bf_stencil_test_mask:8;
uint32_t stencil_write_mask:8;
uint32_t stencil_test_mask:8;
} ds1;
 
struct {
uint32_t pad0:26;
uint32_t depth_write_enable:1;
uint32_t depth_test_func:3;
uint32_t pad1:1;
uint32_t depth_test_enable:1;
} ds2;
};
 
struct gen6_surface_state {
struct {
uint32_t cube_pos_z:1;
uint32_t cube_neg_z:1;
uint32_t cube_pos_y:1;
uint32_t cube_neg_y:1;
uint32_t cube_pos_x:1;
uint32_t cube_neg_x:1;
uint32_t pad:3;
uint32_t render_cache_read_mode:1;
uint32_t mipmap_layout_mode:1;
uint32_t vert_line_stride_ofs:1;
uint32_t vert_line_stride:1;
uint32_t color_blend:1;
uint32_t writedisable_blue:1;
uint32_t writedisable_green:1;
uint32_t writedisable_red:1;
uint32_t writedisable_alpha:1;
uint32_t surface_format:9;
uint32_t data_return_format:1;
uint32_t pad0:1;
uint32_t surface_type:3;
} ss0;
 
struct {
uint32_t base_addr;
} ss1;
 
struct {
uint32_t render_target_rotation:2;
uint32_t mip_count:4;
uint32_t width:13;
uint32_t height:13;
} ss2;
 
struct {
uint32_t tile_walk:1;
uint32_t tiled_surface:1;
uint32_t pad:1;
uint32_t pitch:18;
uint32_t depth:11;
} ss3;
 
struct {
uint32_t pad:19;
uint32_t min_array_elt:9;
uint32_t min_lod:4;
} ss4;
 
struct {
uint32_t pad:20;
uint32_t y_offset:4;
uint32_t pad2:1;
uint32_t x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN6_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN6_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN6_SURFACE_FORMAT_SHIFT 18
#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define GEN6_SURFACE_TYPE_SHIFT 29
#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29)
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN6_SURFACE_HEIGHT_SHIFT 19
#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19)
#define GEN6_SURFACE_WIDTH_SHIFT 6
#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6)
#define GEN6_SURFACE_LOD_SHIFT 2
#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2)
 
/* Surface state DW3 */
#define GEN6_SURFACE_DEPTH_SHIFT 21
#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21)
#define GEN6_SURFACE_PITCH_SHIFT 3
#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3)
#define GEN6_SURFACE_TILED (1 << 1)
#define GEN6_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN6_SURFACE_MIN_LOD_SHIFT 28
#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28)
 
/* Surface state DW5 */
#define GEN6_SURFACE_X_OFFSET_SHIFT 25
#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25)
#define GEN6_SURFACE_Y_OFFSET_SHIFT 20
#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20)
 
struct gen6_cc_viewport {
float min_depth;
float max_depth;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
#endif
/drivers/video/Intel-2D/sna/gen7_render.c
0,0 → 1,2326
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen7_render.h"
#include "gen4_source.h"
#include "gen4_vertex.h"
 
#define ALWAYS_FLUSH 0
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_BOXES 0
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 0
 
#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
 
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
#error "Must select at least 8, 16 or 32 pixel dispatch"
#endif
 
#define GEN7_MAX_SIZE 16384
 
/* XXX Todo
*
* STR (software tiled rendering) mode. No, really.
* 64x32 pixel blocks align with the rendering cache. Worth considering.
*/
 
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
 
struct gt_info {
const char *name;
uint32_t max_vs_threads;
uint32_t max_gs_threads;
uint32_t max_wm_threads;
struct {
int size;
int max_vs_entries;
int max_gs_entries;
int push_ps_size; /* in 1KBs */
} urb;
int gt;
};
 
static const struct gt_info ivb_gt_info = {
.name = "Ivybridge (gen7)",
.max_vs_threads = 16,
.max_gs_threads = 16,
.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 64, 64, 8 },
.gt = 0,
};
 
static const struct gt_info ivb_gt1_info = {
.name = "Ivybridge (gen7, gt1)",
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
.gt = 1,
};
 
static const struct gt_info ivb_gt2_info = {
.name = "Ivybridge (gen7, gt2)",
.max_vs_threads = 128,
.max_gs_threads = 128,
.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 256, 704, 320, 8 },
.gt = 2,
};
 
static const struct gt_info byt_gt_info = {
.name = "Baytrail (gen7)",
.urb = { 128, 64, 64 },
.max_vs_threads = 36,
.max_gs_threads = 36,
.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
.urb = { 128, 512, 192, 8 },
.gt = 1,
};
 
static const struct gt_info hsw_gt_info = {
.name = "Haswell (gen7.5)",
.max_vs_threads = 8,
.max_gs_threads = 8,
.max_wm_threads =
(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 64, 64, 8 },
.gt = 0,
};
 
static const struct gt_info hsw_gt1_info = {
.name = "Haswell (gen7.5, gt1)",
.max_vs_threads = 70,
.max_gs_threads = 70,
.max_wm_threads =
(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 128, 640, 256, 8 },
.gt = 1,
};
 
static const struct gt_info hsw_gt2_info = {
.name = "Haswell (gen7.5, gt2)",
.max_vs_threads = 140,
.max_gs_threads = 140,
.max_wm_threads =
(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 256, 1664, 640, 8 },
.gt = 2,
};
 
static const struct gt_info hsw_gt3_info = {
.name = "Haswell (gen7.5, gt3)",
.max_vs_threads = 280,
.max_gs_threads = 280,
.max_wm_threads =
(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
1 << HSW_PS_SAMPLE_MASK_SHIFT,
.urb = { 512, 3328, 1280, 16 },
.gt = 3,
};
 
inline static bool is_ivb(struct sna *sna)
{
return sna->kgem.gen == 070;
}
 
inline static bool is_byt(struct sna *sna)
{
return sna->kgem.gen == 071;
}
 
inline static bool is_hsw(struct sna *sna)
{
return sna->kgem.gen == 075;
}
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_argb.g7b"
#include "exa_wm_yuv_rgb.g7b"
#include "exa_wm_write.g7b"
};
 
static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_src_affine.g7b"
#include "exa_wm_src_sample_planar.g7b"
#include "exa_wm_yuv_rgb.g7b"
#include "exa_wm_write.g7b"
};
 
#define KERNEL(kernel_enum, kernel, num_surfaces) \
[GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
#define NOKERNEL(kernel_enum, func, num_surfaces) \
[GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
int num_surfaces;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
 
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
 
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
 
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
 
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
 
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen7_blend_op[] = {
/* Clear */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
/* Src */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
/* Over */ {1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
/* In */ {0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen7_blend_op.
*
* This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
* GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define GEN7_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen7_blend_state), 64)
 
#define BLEND_OFFSET(s, d) \
((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
(((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
 
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
 
#define SAMPLER_OFFSET(sf, se, mf, me) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
 
#define VERTEX_2s2s 0
 
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
 
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
#define FILL_VERTEX VERTEX_2s2s
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
 
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
#define GEN7_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V)))
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
}
 
static uint32_t gen7_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
 
src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN7_BLENDFACTOR_DST_ALPHA)
src = GEN7_BLENDFACTOR_ONE;
else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
src = GEN7_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen7_blend_op[op].src_alpha) {
if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
dst = GEN7_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
}
#endif
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, (int)BLEND_OFFSET(src, dst)));
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen7_get_card_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
static uint32_t gen7_get_dest_format(PictFormat format)
{
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN7_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
}
}
 
static int
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
if (is_ca) {
if (gen7_blend_op[op].src_alpha)
base = GEN7_WM_KERNEL_MASKSA;
else
base = GEN7_WM_KERNEL_MASKCA;
} else
base = GEN7_WM_KERNEL_MASK;
} else
base = GEN7_WM_KERNEL_NOMASK;
 
return base + !is_affine;
}
 
static void
gen7_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
 
/* num of VS entries must be divisible by 8 if size < 9 */
OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
(2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
 
OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
(1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
}
 
static void
gen7_emit_state_base_address(struct sna *sna)
{
uint32_t mocs;
 
mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
 
OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
mocs | BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen7.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
mocs | BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen7_disable_vs(struct sna *sna)
{
/* For future reference:
* A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
* to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
* 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
* 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
*
* Here we saved by the full-flush incurred when emitting
* the batchbuffer.
*/
OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
OUT_BATCH(0); /* no VS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_hs(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
OUT_BATCH(0); /* no HS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_te(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_disable_ds(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_gs(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
OUT_BATCH(0); /* no GS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
OUT_BATCH(0);
#endif
}
 
static void
gen7_disable_streamout(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_sf_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_cc_invariant(struct sna *sna)
{
#if 0 /* unused, no change */
OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
OUT_BATCH(0);
#endif
 
/* XXX clear to be safe */
OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen7_disable_clip(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen7_emit_wm_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(0);
 
#if 0
/* XXX length bias of 7 in old spec? */
OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
#endif
}
 
static void
gen7_emit_null_depth_buffer(struct sna *sna)
{
OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
OUT_BATCH(0); /* disable depth, stencil and hiz */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
#if 0
OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
#endif
}
 
static void
gen7_emit_invariant(struct sna *sna)
{
OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
OUT_BATCH(1);
 
gen7_emit_urb(sna);
 
gen7_emit_state_base_address(sna);
 
gen7_disable_vs(sna);
gen7_disable_hs(sna);
gen7_disable_te(sna);
gen7_disable_ds(sna);
gen7_disable_gs(sna);
gen7_disable_clip(sna);
gen7_emit_sf_invariant(sna);
gen7_emit_wm_invariant(sna);
gen7_emit_cc_invariant(sna);
gen7_disable_streamout(sna);
gen7_emit_null_depth_buffer(sna);
 
sna->render_state.gen7.needs_invariant = false;
}
 
static void
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
{
struct gen7_render_state *render = &sna->render_state.gen7;
 
if (render->blend == blend_offset)
return;
 
DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
 
/* XXX can have upto 8 blend states preload, selectable via
* Render Target Index. What other side-effects of Render Target Index?
*/
 
assert (is_aligned(render->cc_blend + blend_offset, 64));
OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
OUT_BATCH((render->cc_blend + blend_offset) | 1);
 
render->blend = blend_offset;
}
 
static void
gen7_emit_sampler(struct sna *sna, uint32_t state)
{
if (sna->render_state.gen7.samplers == state)
return;
 
sna->render_state.gen7.samplers = state;
 
DBG(("%s: sampler = %x\n", __FUNCTION__, state));
 
assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
OUT_BATCH(sna->render_state.gen7.wm_state + state);
}
 
static void
gen7_emit_sf(struct sna *sna, bool has_mask)
{
int num_sf_outputs = has_mask ? 2 : 1;
 
if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
return;
 
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
__FUNCTION__, num_sf_outputs, 1, 0));
 
sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
 
OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0); /* dw4 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* dw8 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* dw12 */
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_wm(struct sna *sna, int kernel)
{
const uint32_t *kernels;
 
if (sna->render_state.gen7.kernel == kernel)
return;
 
sna->render_state.gen7.kernel = kernel;
kernels = sna->render_state.gen7.wm_kernel[kernel];
 
DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name,
wm_kernels[kernel].num_surfaces,
kernels[0], kernels[1], kernels[2]));
 
OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch address */
OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
(kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
(kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
GEN7_PS_ATTRIBUTE_ENABLE);
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
}
 
static bool
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen7.surface_table == offset)
return false;
 
/* Binding table pointers */
assert(is_aligned(4*offset, 32));
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
OUT_BATCH(offset*4);
 
sna->render_state.gen7.surface_table = offset;
return true;
}
 
static bool
gen7_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen7.drawrect_limit == limit &&
sna->render_state.gen7.drawrect_offset == offset)
return true;
 
sna->render_state.gen7.drawrect_offset = offset;
sna->render_state.gen7.drawrect_limit = limit;
 
OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
return false;
}
 
static void
gen7_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen7_render_state *render = &sna->render_state.gen7;
uint32_t src_format, dw;
int id = GEN7_VERTEX(op->u.gen7.flags);
bool has_mask;
 
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
((2 * (3 + has_mask)) + 1 - 2));
 
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
 
/* x,y */
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
4 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
offset << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
inline static void
gen7_emit_pipe_invalidate(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
GEN7_PIPE_CONTROL_TC_FLUSH |
GEN7_PIPE_CONTROL_CS_STALL);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
inline static void
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
{
unsigned stall;
 
stall = 0;
if (need_stall)
stall = (GEN7_PIPE_CONTROL_CS_STALL |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
 
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
inline static void
gen7_emit_pipe_stall(struct sna *sna)
{
OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen7_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall;
 
assert(op->dst.bo->exec);
 
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
gen7_emit_vertex_elements(sna, op);
 
need_stall = gen7_emit_binding_table(sna, wm_binding_table);
need_stall &= gen7_emit_drawing_rectangle(sna, op);
 
if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen7_emit_pipe_invalidate(sna);
kgem_clear_dirty(&sna->kgem);
assert(op->dst.bo->exec);
kgem_bo_mark_dirty(op->dst.bo);
sna->render_state.gen7.emit_flush = false;
need_stall = false;
}
if (sna->render_state.gen7.emit_flush) {
gen7_emit_pipe_flush(sna, need_stall);
need_stall = false;
}
if (need_stall)
gen7_emit_pipe_stall(sna);
 
sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
}
 
static bool gen7_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen7_render_state *state = &sna->render_state.gen7;
 
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
 
gen7_emit_pipe_stall(sna);
 
gen7_emit_cc(sna,
GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
op->dst.format)));
gen7_emit_wm(sna,
gen7_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine));
 
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen7_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
break;
}
}
 
static void
sampler_copy_init(struct gen7_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static void
sampler_fill_init(struct gen7_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static uint32_t
gen7_tiling_bits(uint32_t tiling)
{
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN7_SURFACE_TILED;
case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
}
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static uint32_t
gen7_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t *ss;
uint32_t domains;
int offset;
uint32_t is_scanout = is_dst && bo->scanout;
 
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
if (offset) {
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
gen7_tiling_bits(bo->tiling) |
format << GEN7_SURFACE_FORMAT_SHIFT);
if (bo->tiling == I915_TILING_Y)
ss[0] |= GEN7_SURFACE_VALIGN_4;
if (is_dst) {
ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
} else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
ss[6] = 0;
ss[7] = 0;
if (is_hsw(sna))
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen7_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = GEN7_VERTEX(op->u.gen7.flags);
 
OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
GEN7_VB0_VERTEXDATA |
GEN7_VB0_ADDRESS_MODIFY_ENABLE |
4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen7_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
 
sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
}
 
static bool gen7_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
int ndwords;
 
if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
return true;
 
ndwords = op->need_magic_ca_pass ? 60 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen7_emit_vertex_buffer(sna, op);
 
gen7_emit_primitive(sna);
return true;
}
 
static int gen7_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
/* Preventing discarding new vbo after lock contention */
if (sna_vertex_wait__locked(&sna->render)) {
int rem = vertex_space(sna);
if (rem > op->floats_per_rect)
return rem;
}
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen7_magic_ca_pass(sna, op)) {
gen7_emit_pipe_stall(sna);
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
}
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen7_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
{
int rem;
 
assert(want);
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen7_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0)) {
if (!gen7_rectangle_begin(sna, op))
goto flush;
else
goto start;
}
 
assert(rem <= vertex_space(sna));
assert(op->floats_per_rect <= rem);
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
uint32_t *table;
 
sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
/* Clear all surplus entries to zero in case of prefetch */
table = memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen7_surface_state));
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
*offset = sna->kgem.surface;
return table;
}
 
static void
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
_kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
assert(sna->kgem.mode == KGEM_RENDER);
assert(sna->kgem.ring == KGEM_RENDER);
 
if (sna->render_state.gen7.needs_invariant)
gen7_emit_invariant(sna);
}
 
static void gen7_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
 
gen7_get_batch(sna, op);
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen7_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen7_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
binding_table[2] =
gen7_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
offset = sna->render_state.gen7.surface_table;
}
 
gen7_emit_state(sna, op, offset);
}
 
static void
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen7.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
}
}
 
fastcall static void
gen7_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
op->prim_emit(sna, op, r);
}
static uint32_t
gen7_composite_create_blend_state(struct sna_static_stream *stream)
{
char *base, *ptr;
int src, dst;
 
base = sna_static_stream_map(stream,
GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
64);
 
ptr = base;
for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
struct gen7_blend_state *blend =
(struct gen7_blend_state *)ptr;
 
blend->blend0.dest_blend_factor = dst;
blend->blend0.source_blend_factor = src;
blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
blend->blend0.blend_enable =
!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
 
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.pre_blend_clamp_enable = 1;
 
ptr += GEN7_BLEND_STATE_PADDED_SIZE;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
static uint32_t gen7_bind_video_source(struct sna *sna,
struct kgem_bo *bo,
uint32_t offset,
int width,
int height,
int pitch,
uint32_t format)
{
uint32_t *ss, bind;
 
bind = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
 
assert(bo->tiling == I915_TILING_NONE);
 
ss = sna->kgem.batch + bind;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
format << GEN7_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
I915_GEM_DOMAIN_SAMPLER << 16,
offset);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
ss[4] = 0;
ss[5] = 0;
ss[6] = 0;
ss[7] = 0;
if (is_hsw(sna))
ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
bind, bo->handle, ss[1],
format, width, height, pitch, offset));
 
return bind * sizeof(uint32_t);
}
 
static void gen7_emit_video_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
int n_src, n;
 
gen7_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
binding_table = gen7_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen7_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen7_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen7_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen7_emit_state(sna, op, offset);
}
 
static bool
gen7_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
int src_width = frame->src.x2 - frame->src.x1;
int src_height = frame->src.y2 - frame->src.y1;
float src_offset_x, src_offset_y;
float src_scale_x, src_scale_y;
int nbox, pix_xoff, pix_yoff;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__,
src_width, src_height, dst_width, dst_height,
(long)REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_width == dst_width && src_height == dst_height)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp.u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN7_WM_KERNEL_VIDEO_PLANAR :
GEN7_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_emit_video_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
__FUNCTION__,
frame->src.x1, frame->src.y1,
src_width, src_height,
dst_width, dst_height,
frame->width, frame->height));
 
src_scale_x = (float)src_width / dst_width / frame->width;
src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
 
src_scale_y = (float)src_height / dst_height / frame->height;
src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
 
DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
__FUNCTION__,
src_scale_x, src_scale_y,
src_offset_x, src_offset_y));
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
__FUNCTION__,
box->x1, box->y1,
box->x2, box->y2,
pix_xoff, pix_yoff,
box->x1 * src_scale_x + src_offset_x,
box->y1 * src_scale_y + src_offset_y,
box->x2 * src_scale_x + src_offset_x,
box->y2 * src_scale_y + src_offset_y));
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
 
gen4_vertex_flush(sna);
return true;
}
#endif
 
static void gen7_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen7_magic_ca_pass(sna, op);
}
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
#if 0
static bool
gen7_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen7_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen7_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
n > 1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen7.flags = FILL_FLAGS(op, format);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen7_emit_fill_state(sna, &tmp);
gen7_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen7_get_rectangles(sna, &tmp, n,
gen7_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
#endif
 
static void gen7_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
static void
gen7_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (kgem->nbatch) {
DBG(("%s: switch rings %d -> %d\n",
__FUNCTION__, kgem->mode, new_mode));
_kgem_submit(kgem);
}
 
kgem->ring = new_mode;
}
 
static void
gen7_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void
gen7_render_expire(struct kgem *kgem)
{
struct sna *sna;
 
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
static void gen7_render_reset(struct sna *sna)
{
sna->render_state.gen7.emit_flush = false;
sna->render_state.gen7.needs_invariant = true;
sna->render_state.gen7.ve_id = 3 << 2;
sna->render_state.gen7.last_primitive = -1;
 
sna->render_state.gen7.num_sf_outputs = 0;
sna->render_state.gen7.samplers = -1;
sna->render_state.gen7.blend = -1;
sna->render_state.gen7.kernel = -1;
sna->render_state.gen7.drawrect_offset = -1;
sna->render_state.gen7.drawrect_limit = -1;
sna->render_state.gen7.surface_table = -1;
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen7_render_fini(struct sna *sna)
{
kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
}
 
static bool is_gt3(struct sna *sna)
{
assert(sna->kgem.gen == 075);
return sna->PciInfo->device_id & 0x20;
}
 
static bool is_gt2(struct sna *sna)
{
return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
}
 
static bool is_mobile(struct sna *sna)
{
return (sna->PciInfo->device_id & 0xf) == 0x6;
}
 
static bool gen7_render_setup(struct sna *sna)
{
struct gen7_render_state *state = &sna->render_state.gen7;
struct sna_static_stream general;
struct gen7_sampler_state *ss;
int i, j, k, l, m;
 
if (is_ivb(sna)) {
state->info = &ivb_gt_info;
if (sna->PciInfo->device_id & 0xf) {
state->info = &ivb_gt1_info;
if (is_gt2(sna))
state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
}
} else if (is_byt(sna)) {
state->info = &byt_gt_info;
} else if (is_hsw(sna)) {
state->info = &hsw_gt_info;
if (sna->PciInfo->device_id & 0xf) {
if (is_gt3(sna))
state->info = &hsw_gt3_info;
else if (is_gt2(sna))
state->info = &hsw_gt2_info;
else
state->info = &hsw_gt1_info;
}
} else
return false;
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
 
for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
if (USE_8_PIXEL_DISPATCH) {
state->wm_kernel[m][0] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 8);
}
 
if (USE_16_PIXEL_DISPATCH) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
 
if (USE_32_PIXEL_DISPATCH) {
state->wm_kernel[m][2] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 32);
}
}
assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
}
 
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
(2 +
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT),
32);
state->wm_state = sna_static_stream_offsetof(&general, ss);
sampler_copy_init(ss); ss += 2;
sampler_fill_init(ss); ss += 2;
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
sampler_state_init(ss++, i, j);
sampler_state_init(ss++, k, l);
}
}
}
}
 
state->cc_blend = gen7_composite_create_blend_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
const char *gen7_render_init(struct sna *sna, const char *backend)
{
if (!gen7_render_setup(sna))
return backend;
 
sna->kgem.context_switch = gen7_render_context_switch;
sna->kgem.retire = gen7_render_retire;
sna->kgem.expire = gen7_render_expire;
 
#if 0
#if !NO_COMPOSITE
sna->render.composite = gen7_render_composite;
sna->render.prefer_gpu |= PREFER_GPU_RENDER;
#endif
#if !NO_COMPOSITE_SPANS
sna->render.check_composite_spans = gen7_check_composite_spans;
sna->render.composite_spans = gen7_render_composite_spans;
if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
sna->render.prefer_gpu |= PREFER_GPU_SPANS;
#endif
sna->render.video = gen7_render_video;
 
#if !NO_COPY_BOXES
sna->render.copy_boxes = gen7_render_copy_boxes;
#endif
#if !NO_COPY
sna->render.copy = gen7_render_copy;
#endif
 
#if !NO_FILL_BOXES
sna->render.fill_boxes = gen7_render_fill_boxes;
#endif
#if !NO_FILL
sna->render.fill = gen7_render_fill;
#endif
#if !NO_FILL_ONE
sna->render.fill_one = gen7_render_fill_one;
#endif
#if !NO_FILL_CLEAR
sna->render.clear = gen7_render_clear;
#endif
#endif
 
sna->render.blit_tex = gen7_blit_tex;
sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
 
sna->render.flush = gen7_render_flush;
sna->render.reset = gen7_render_reset;
sna->render.fini = gen7_render_fini;
 
sna->render.max_3d_size = GEN7_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return sna->render_state.gen7.info->name;
}
 
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp)
{
 
 
tmp->op = PictOpSrc;
 
tmp->dst.pixmap = dst;
tmp->dst.bo = dst_bo;
tmp->dst.width = dst->drawable.width;
tmp->dst.height = dst->drawable.height;
tmp->dst.format = PICT_a8r8g8b8;
 
 
tmp->src.repeat = RepeatNone;
tmp->src.filter = PictFilterNearest;
tmp->src.is_affine = true;
 
tmp->src.bo = src_bo;
tmp->src.pict_format = PICT_x8r8g8b8;
tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
tmp->src.width = src->drawable.width;
tmp->src.height = src->drawable.height;
 
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.is_affine = true;
 
tmp->mask.bo = mask_bo;
tmp->mask.pict_format = PIXMAN_a8;
tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
tmp->mask.width = mask->drawable.width;
tmp->mask.height = mask->drawable.height;
 
if( scale )
{
tmp->src.scale[0] = 1.f/width;
tmp->src.scale[1] = 1.f/height;
}
else
{
tmp->src.scale[0] = 1.f/src->drawable.width;
tmp->src.scale[1] = 1.f/src->drawable.height;
}
 
tmp->mask.scale[0] = 1.f/mask->drawable.width;
tmp->mask.scale[1] = 1.f/mask->drawable.height;
 
 
 
tmp->u.gen7.flags =
GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen7_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
/* gen7_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine), */
GEN7_WM_KERNEL_MASK,
gen4_choose_composite_emitter(sna, tmp));
 
tmp->blt = gen7_render_composite_blt;
// tmp->box = gen7_render_composite_box;
tmp->done = gen7_render_composite_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
NULL)) {
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen7_emit_composite_state(sna, tmp);
gen7_align_vertex(sna, tmp);
return true;
}
/drivers/video/Intel-2D/sna/gen7_render.h
0,0 → 1,1366
#ifndef GEN7_RENDER_H
#define GEN7_RENDER_H
 
#define INTEL_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
 
#define GEN7_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN7_STATE_BASE_ADDRESS GEN7_3D(0, 1, 1)
#define GEN7_STATE_SIP GEN7_3D(0, 1, 2)
 
#define GEN7_PIPELINE_SELECT GEN7_3D(1, 1, 4)
 
#define GEN7_MEDIA_STATE_POINTERS GEN7_3D(2, 0, 0)
#define GEN7_MEDIA_OBJECT GEN7_3D(2, 1, 0)
 
#define GEN7_3DSTATE_VERTEX_BUFFERS GEN7_3D(3, 0, 8)
#define GEN7_3DSTATE_VERTEX_ELEMENTS GEN7_3D(3, 0, 9)
#define GEN7_3DSTATE_INDEX_BUFFER GEN7_3D(3, 0, 0xa)
#define GEN7_3DSTATE_VF_STATISTICS GEN7_3D(3, 0, 0xb)
 
#define GEN7_3DSTATE_DRAWING_RECTANGLE GEN7_3D(3, 1, 0)
#define GEN7_3DSTATE_CONSTANT_COLOR GEN7_3D(3, 1, 1)
#define GEN7_3DSTATE_SAMPLER_PALETTE_LOAD GEN7_3D(3, 1, 2)
#define GEN7_3DSTATE_CHROMA_KEY GEN7_3D(3, 1, 4)
 
#define GEN7_3DSTATE_POLY_STIPPLE_OFFSET GEN7_3D(3, 1, 6)
#define GEN7_3DSTATE_POLY_STIPPLE_PATTERN GEN7_3D(3, 1, 7)
#define GEN7_3DSTATE_LINE_STIPPLE GEN7_3D(3, 1, 8)
#define GEN7_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN7_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN7_3DSTATE_AA_LINE_PARAMS GEN7_3D(3, 1, 0xa)
#define GEN7_3DSTATE_GS_SVB_INDEX GEN7_3D(3, 1, 0xb)
 
#define GEN7_3DPRIMITIVE GEN7_3D(3, 3, 0)
 
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS GEN7_3D(3, 0, 0x02)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN7_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN7_3DSTATE_URB GEN7_3D(3, 0, 0x05)
/* DW1 */
# define GEN7_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN7_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN7_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN7_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS GEN7_3D(3, 0, 0x0d)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN7_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN7_3DSTATE_CC_STATE_POINTERS GEN7_3D(3, 0, 0x0e)
 
#define GEN7_3DSTATE_VS GEN7_3D(3, 0, 0x10)
 
#define GEN7_3DSTATE_GS GEN7_3D(3, 0, 0x11)
/* DW4 */
# define GEN7_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN7_3DSTATE_CLIP GEN7_3D(3, 0, 0x12)
 
#define GEN7_3DSTATE_SF GEN7_3D(3, 0, 0x13)
/* DW1 */
# define GEN7_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN7_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN7_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN7_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN7_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN7_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN7_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN7_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
#define GEN7_3DSTATE_WM GEN7_3D(3, 0, 0x14)
/* DW1 */
# define GEN7_WM_STATISTICS_ENABLE (1 << 31)
# define GEN7_WM_DEPTH_CLEAR (1 << 30)
# define GEN7_WM_DISPATCH_ENABLE (1 << 29)
# define GEN7_WM_DEPTH_RESOLVE (1 << 28)
# define GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE (1 << 27)
# define GEN7_WM_KILL_ENABLE (1 << 25)
# define GEN7_WM_PSCDEPTH_OFF (0 << 23)
# define GEN7_WM_PSCDEPTH_ON (1 << 23)
# define GEN7_WM_PSCDEPTH_ON_GE (2 << 23)
# define GEN7_WM_PSCDEPTH_ON_LE (3 << 23)
# define GEN7_WM_USES_SOURCE_DEPTH (1 << 20)
# define GEN7_WM_USES_SOURCE_W (1 << 19)
# define GEN7_WM_POSITION_ZW_PIXEL (0 << 17)
# define GEN7_WM_POSITION_ZW_CENTROID (2 << 17)
# define GEN7_WM_POSITION_ZW_SAMPLE (3 << 17)
# define GEN7_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 16)
# define GEN7_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 15)
# define GEN7_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 14)
# define GEN7_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 13)
# define GEN7_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 12)
# define GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 11)
# define GEN7_WM_USES_INPUT_COVERAGE_MASK (1 << 10)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 (0 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0 (1 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_2_0 (2 << 8)
# define GEN7_WM_LINE_END_CAP_AA_WIDTH_4_0 (3 << 8)
# define GEN7_WM_LINE_AA_WIDTH_0_5 (0 << 6)
# define GEN7_WM_LINE_AA_WIDTH_1_0 (1 << 6)
# define GEN7_WM_LINE_AA_WIDTH_2_0 (2 << 6)
# define GEN7_WM_LINE_AA_WIDTH_4_0 (3 << 6)
# define GEN7_WM_POLYGON_STIPPLE_ENABLE (1 << 4)
# define GEN7_WM_LINE_STIPPLE_ENABLE (1 << 3)
# define GEN7_WM_POINT_RASTRULE_UPPER_RIGHT (1 << 2)
# define GEN7_WM_MSRAST_OFF_PIXEL (0 << 0)
# define GEN7_WM_MSRAST_OFF_PATTERN (1 << 0)
# define GEN7_WM_MSRAST_ON_PIXEL (2 << 0)
# define GEN7_WM_MSRAST_ON_PATTERN (3 << 0)
/* DW2 */
# define GEN7_WM_MSDISPMODE_PERPIXEL (1 << 31)
 
 
#define GEN7_3DSTATE_CONSTANT_VS GEN7_3D(3, 0, 0x15)
#define GEN7_3DSTATE_CONSTANT_GS GEN7_3D(3, 0, 0x16)
#define GEN7_3DSTATE_CONSTANT_PS GEN7_3D(3, 0, 0x17)
 
#define GEN7_3DSTATE_SAMPLE_MASK GEN7_3D(3, 0, 0x18)
 
#define GEN7_3DSTATE_MULTISAMPLE GEN7_3D(3, 1, 0x0d)
/* DW1 */
# define GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
/* for GEN7_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN7_PIPE_CONTROL */
#define GEN7_PIPE_CONTROL GEN7_3D(3, 2, 0)
#define GEN7_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN7_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN7_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN7_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN7_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN7_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN7_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN7_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN7_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN7_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN7_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN7_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN7_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define GEN7_VB0_BUFFER_INDEX_SHIFT 26
#define GEN7_VB0_VERTEXDATA (0 << 20)
#define GEN7_VB0_INSTANCEDATA (1 << 20)
#define GEN7_VB0_BUFFER_PITCH_SHIFT 0
#define GEN7_VB0_ADDRESS_MODIFY_ENABLE (1 << 14)
 
/* VERTEX_ELEMENT_STATE Structure */
#define GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT 26
#define GEN7_VE0_VALID (1 << 25)
#define GEN7_VE0_FORMAT_SHIFT 16
#define GEN7_VE0_OFFSET_SHIFT 0
#define GEN7_VE1_VFCOMPONENT_0_SHIFT 28
#define GEN7_VE1_VFCOMPONENT_1_SHIFT 24
#define GEN7_VE1_VFCOMPONENT_2_SHIFT 20
#define GEN7_VE1_VFCOMPONENT_3_SHIFT 16
#define GEN7_VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN7_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
 
#define GEN7_SVG_CTL 0x7400
 
#define GEN7_SVG_CTL_GS_BA (0 << 8)
#define GEN7_SVG_CTL_SS_BA (1 << 8)
#define GEN7_SVG_CTL_IO_BA (2 << 8)
#define GEN7_SVG_CTL_GS_AUB (3 << 8)
#define GEN7_SVG_CTL_IO_AUB (4 << 8)
#define GEN7_SVG_CTL_SIP (5 << 8)
 
#define GEN7_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN7_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN7_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN7_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN7_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN7_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN7_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN7_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_VF_STRG_VAL 0x7504
#define GEN7_VF_STR_VL_OVR 0x7508
#define GEN7_VF_VC_OVR 0x750c
#define GEN7_VF_STR_PSKIP 0x7510
#define GEN7_VF_MAX_PRIM 0x7514
#define GEN7_VF_RDATA 0x7518
 
#define GEN7_VS_CTL 0x7600
#define GEN7_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN7_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN7_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_VS_STRG_VAL 0x7604
#define GEN7_VS_RDATA 0x7608
 
#define GEN7_SF_CTL 0x7b00
#define GEN7_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN7_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN7_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN7_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN7_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_SF_STRG_VAL 0x7b04
#define GEN7_SF_RDATA 0x7b18
 
#define GEN7_WIZ_CTL 0x7c00
#define GEN7_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN7_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN7_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN7_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN7_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN7_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN7_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN7_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN7_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_WIZ_STRG_VAL 0x7c04
#define GEN7_WIZ_RDATA 0x7c18
 
#define GEN7_TS_CTL 0x7e00
#define GEN7_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN7_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN7_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN7_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN7_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN7_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN7_TS_STRG_VAL 0x7e04
#define GEN7_TS_RDATA 0x7e08
 
#define GEN7_TD_CTL 0x8000
#define GEN7_TD_CTL_MUX_SHIFT 8
#define GEN7_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN7_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN7_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN7_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN7_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN7_TD_CTL2 0x8004
#define GEN7_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN7_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN7_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN7_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN7_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN7_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN7_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN7_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN7_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN7_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN7_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN7_TD_VF_VS_EMSK 0x8008
#define GEN7_TD_GS_EMSK 0x800c
#define GEN7_TD_CLIP_EMSK 0x8010
#define GEN7_TD_SF_EMSK 0x8014
#define GEN7_TD_WIZ_EMSK 0x8018
#define GEN7_TD_0_6_EHTRG_VAL 0x801c
#define GEN7_TD_0_7_EHTRG_VAL 0x8020
#define GEN7_TD_0_6_EHTRG_MSK 0x8024
#define GEN7_TD_0_7_EHTRG_MSK 0x8028
#define GEN7_TD_RDATA 0x802c
#define GEN7_TD_TS_EMSK 0x8030
 
#define GEN7_EU_CTL 0x8800
#define GEN7_EU_CTL_SELECT_SHIFT 16
#define GEN7_EU_CTL_DATA_MUX_SHIFT 8
#define GEN7_EU_ATT_0 0x8810
#define GEN7_EU_ATT_1 0x8814
#define GEN7_EU_ATT_DATA_0 0x8820
#define GEN7_EU_ATT_DATA_1 0x8824
#define GEN7_EU_ATT_CLR_0 0x8830
#define GEN7_EU_ATT_CLR_1 0x8834
#define GEN7_EU_RDATA 0x8840
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN7_ANISORATIO_2 0
#define GEN7_ANISORATIO_4 1
#define GEN7_ANISORATIO_6 2
#define GEN7_ANISORATIO_8 3
#define GEN7_ANISORATIO_10 4
#define GEN7_ANISORATIO_12 5
#define GEN7_ANISORATIO_14 6
#define GEN7_ANISORATIO_16 7
 
#define GEN7_BLENDFACTOR_ONE 0x1
#define GEN7_BLENDFACTOR_SRC_COLOR 0x2
#define GEN7_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN7_BLENDFACTOR_DST_ALPHA 0x4
#define GEN7_BLENDFACTOR_DST_COLOR 0x5
#define GEN7_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN7_BLENDFACTOR_CONST_COLOR 0x7
#define GEN7_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN7_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN7_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN7_BLENDFACTOR_ZERO 0x11
#define GEN7_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN7_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN7_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN7_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN7_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN7_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN7_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN7_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN7_BLENDFUNCTION_ADD 0
#define GEN7_BLENDFUNCTION_SUBTRACT 1
#define GEN7_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN7_BLENDFUNCTION_MIN 3
#define GEN7_BLENDFUNCTION_MAX 4
 
#define GEN7_ALPHATEST_FORMAT_UNORM8 0
#define GEN7_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN7_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN7_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN7_CLIP_API_OGL 0
#define GEN7_CLIP_API_DX 1
 
#define GEN7_CLIPMODE_NORMAL 0
#define GEN7_CLIPMODE_CLIP_ALL 1
#define GEN7_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN7_CLIPMODE_REJECT_ALL 3
#define GEN7_CLIPMODE_ACCEPT_ALL 4
 
#define GEN7_CLIP_NDCSPACE 0
#define GEN7_CLIP_SCREENSPACE 1
 
#define GEN7_COMPAREFUNCTION_ALWAYS 0
#define GEN7_COMPAREFUNCTION_NEVER 1
#define GEN7_COMPAREFUNCTION_LESS 2
#define GEN7_COMPAREFUNCTION_EQUAL 3
#define GEN7_COMPAREFUNCTION_LEQUAL 4
#define GEN7_COMPAREFUNCTION_GREATER 5
#define GEN7_COMPAREFUNCTION_NOTEQUAL 6
#define GEN7_COMPAREFUNCTION_GEQUAL 7
 
#define GEN7_COVERAGE_PIXELS_HALF 0
#define GEN7_COVERAGE_PIXELS_1 1
#define GEN7_COVERAGE_PIXELS_2 2
#define GEN7_COVERAGE_PIXELS_4 3
 
#define GEN7_CULLMODE_BOTH 0
#define GEN7_CULLMODE_NONE 1
#define GEN7_CULLMODE_FRONT 2
#define GEN7_CULLMODE_BACK 3
 
#define GEN7_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN7_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN7_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN7_DEPTHFORMAT_D32_FLOAT 1
#define GEN7_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN7_DEPTHFORMAT_D16_UNORM 5
 
#define GEN7_FLOATING_POINT_IEEE_754 0
#define GEN7_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN7_FRONTWINDING_CW 0
#define GEN7_FRONTWINDING_CCW 1
 
#define GEN7_INDEX_BYTE 0
#define GEN7_INDEX_WORD 1
#define GEN7_INDEX_DWORD 2
 
#define GEN7_LOGICOPFUNCTION_CLEAR 0
#define GEN7_LOGICOPFUNCTION_NOR 1
#define GEN7_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN7_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN7_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN7_LOGICOPFUNCTION_INVERT 5
#define GEN7_LOGICOPFUNCTION_XOR 6
#define GEN7_LOGICOPFUNCTION_NAND 7
#define GEN7_LOGICOPFUNCTION_AND 8
#define GEN7_LOGICOPFUNCTION_EQUIV 9
#define GEN7_LOGICOPFUNCTION_NOOP 10
#define GEN7_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN7_LOGICOPFUNCTION_COPY 12
#define GEN7_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN7_LOGICOPFUNCTION_OR 14
#define GEN7_LOGICOPFUNCTION_SET 15
 
#define GEN7_MAPFILTER_NEAREST 0x0
#define GEN7_MAPFILTER_LINEAR 0x1
#define GEN7_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN7_MIPFILTER_NONE 0
#define GEN7_MIPFILTER_NEAREST 1
#define GEN7_MIPFILTER_LINEAR 3
 
#define GEN7_POLYGON_FRONT_FACING 0
#define GEN7_POLYGON_BACK_FACING 1
 
#define GEN7_PREFILTER_ALWAYS 0x0
#define GEN7_PREFILTER_NEVER 0x1
#define GEN7_PREFILTER_LESS 0x2
#define GEN7_PREFILTER_EQUAL 0x3
#define GEN7_PREFILTER_LEQUAL 0x4
#define GEN7_PREFILTER_GREATER 0x5
#define GEN7_PREFILTER_NOTEQUAL 0x6
#define GEN7_PREFILTER_GEQUAL 0x7
 
#define GEN7_PROVOKING_VERTEX_0 0
#define GEN7_PROVOKING_VERTEX_1 1
#define GEN7_PROVOKING_VERTEX_2 2
 
#define GEN7_RASTRULE_UPPER_LEFT 0
#define GEN7_RASTRULE_UPPER_RIGHT 1
 
#define GEN7_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN7_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN7_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN7_STENCILOP_KEEP 0
#define GEN7_STENCILOP_ZERO 1
#define GEN7_STENCILOP_REPLACE 2
#define GEN7_STENCILOP_INCRSAT 3
#define GEN7_STENCILOP_DECRSAT 4
#define GEN7_STENCILOP_INCR 5
#define GEN7_STENCILOP_DECR 6
#define GEN7_STENCILOP_INVERT 7
 
#define GEN7_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN7_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN7_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN7_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN7_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN7_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN7_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN7_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN7_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN7_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN7_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN7_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN7_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN7_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN7_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN7_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN7_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN7_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN7_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN7_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN7_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN7_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN7_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN7_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN7_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN7_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN7_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN7_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN7_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN7_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN7_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN7_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN7_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN7_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN7_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN7_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN7_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN7_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN7_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN7_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN7_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN7_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN7_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN7_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN7_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN7_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN7_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN7_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN7_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN7_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN7_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN7_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN7_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN7_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN7_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN7_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN7_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN7_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN7_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN7_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN7_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN7_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN7_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN7_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN7_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN7_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN7_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN7_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN7_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN7_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN7_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN7_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN7_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN7_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN7_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN7_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN7_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN7_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN7_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN7_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN7_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN7_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN7_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN7_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN7_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN7_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN7_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN7_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN7_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN7_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN7_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN7_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN7_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN7_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN7_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN7_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN7_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN7_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN7_SURFACEFORMAT_R16_SINT 0x10C
#define GEN7_SURFACEFORMAT_R16_UINT 0x10D
#define GEN7_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN7_SURFACEFORMAT_I16_UNORM 0x111
#define GEN7_SURFACEFORMAT_L16_UNORM 0x112
#define GEN7_SURFACEFORMAT_A16_UNORM 0x113
#define GEN7_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN7_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN7_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN7_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN7_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN7_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN7_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN7_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN7_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN7_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN7_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN7_SURFACEFORMAT_R8_UNORM 0x140
#define GEN7_SURFACEFORMAT_R8_SNORM 0x141
#define GEN7_SURFACEFORMAT_R8_SINT 0x142
#define GEN7_SURFACEFORMAT_R8_UINT 0x143
#define GEN7_SURFACEFORMAT_A8_UNORM 0x144
#define GEN7_SURFACEFORMAT_I8_UNORM 0x145
#define GEN7_SURFACEFORMAT_L8_UNORM 0x146
#define GEN7_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN7_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN7_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN7_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN7_SURFACEFORMAT_R1_UINT 0x181
#define GEN7_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN7_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN7_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN7_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN7_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN7_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN7_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN7_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN7_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN7_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN7_SURFACEFORMAT_MONO8 0x18E
#define GEN7_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN7_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN7_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN7_SURFACEFORMAT_FXT1 0x192
#define GEN7_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN7_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN7_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN7_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN7_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN7_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN7_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN7_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN7_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN7_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN7_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN7_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN7_SURFACERETURNFORMAT_FLOAT32 0
#define GEN7_SURFACERETURNFORMAT_S1 1
 
#define GEN7_SURFACE_1D 0
#define GEN7_SURFACE_2D 1
#define GEN7_SURFACE_3D 2
#define GEN7_SURFACE_CUBE 3
#define GEN7_SURFACE_BUFFER 4
#define GEN7_SURFACE_NULL 7
 
#define GEN7_BORDER_COLOR_MODE_DEFAULT 0
#define GEN7_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN7_TEXCOORDMODE_WRAP 0
#define GEN7_TEXCOORDMODE_MIRROR 1
#define GEN7_TEXCOORDMODE_CLAMP 2
#define GEN7_TEXCOORDMODE_CUBE 3
#define GEN7_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN7_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN7_THREAD_PRIORITY_NORMAL 0
#define GEN7_THREAD_PRIORITY_HIGH 1
 
#define GEN7_TILEWALK_XMAJOR 0
#define GEN7_TILEWALK_YMAJOR 1
 
#define GEN7_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN7_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN7_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN7_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define GEN7_VFCOMPONENT_NOSTORE 0
#define GEN7_VFCOMPONENT_STORE_SRC 1
#define GEN7_VFCOMPONENT_STORE_0 2
#define GEN7_VFCOMPONENT_STORE_1_FLT 3
#define GEN7_VFCOMPONENT_STORE_1_INT 4
#define GEN7_VFCOMPONENT_STORE_VID 5
#define GEN7_VFCOMPONENT_STORE_IID 6
#define GEN7_VFCOMPONENT_STORE_PID 7
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN7_ALIGN_1 0
#define GEN7_ALIGN_16 1
 
#define GEN7_ADDRESS_DIRECT 0
#define GEN7_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN7_CHANNEL_X 0
#define GEN7_CHANNEL_Y 1
#define GEN7_CHANNEL_Z 2
#define GEN7_CHANNEL_W 3
 
#define GEN7_COMPRESSION_NONE 0
#define GEN7_COMPRESSION_2NDHALF 1
#define GEN7_COMPRESSION_COMPRESSED 2
 
#define GEN7_CONDITIONAL_NONE 0
#define GEN7_CONDITIONAL_Z 1
#define GEN7_CONDITIONAL_NZ 2
#define GEN7_CONDITIONAL_EQ 1 /* Z */
#define GEN7_CONDITIONAL_NEQ 2 /* NZ */
#define GEN7_CONDITIONAL_G 3
#define GEN7_CONDITIONAL_GE 4
#define GEN7_CONDITIONAL_L 5
#define GEN7_CONDITIONAL_LE 6
#define GEN7_CONDITIONAL_C 7
#define GEN7_CONDITIONAL_O 8
 
#define GEN7_DEBUG_NONE 0
#define GEN7_DEBUG_BREAKPOINT 1
 
#define GEN7_DEPENDENCY_NORMAL 0
#define GEN7_DEPENDENCY_NOTCLEARED 1
#define GEN7_DEPENDENCY_NOTCHECKED 2
#define GEN7_DEPENDENCY_DISABLE 3
 
#define GEN7_EXECUTE_1 0
#define GEN7_EXECUTE_2 1
#define GEN7_EXECUTE_4 2
#define GEN7_EXECUTE_8 3
#define GEN7_EXECUTE_16 4
#define GEN7_EXECUTE_32 5
 
#define GEN7_HORIZONTAL_STRIDE_0 0
#define GEN7_HORIZONTAL_STRIDE_1 1
#define GEN7_HORIZONTAL_STRIDE_2 2
#define GEN7_HORIZONTAL_STRIDE_4 3
 
#define GEN7_INSTRUCTION_NORMAL 0
#define GEN7_INSTRUCTION_SATURATE 1
 
#define INTEL_MASK_ENABLE 0
#define INTEL_MASK_DISABLE 1
 
#define GEN7_OPCODE_MOV 1
#define GEN7_OPCODE_SEL 2
#define GEN7_OPCODE_NOT 4
#define GEN7_OPCODE_AND 5
#define GEN7_OPCODE_OR 6
#define GEN7_OPCODE_XOR 7
#define GEN7_OPCODE_SHR 8
#define GEN7_OPCODE_SHL 9
#define GEN7_OPCODE_RSR 10
#define GEN7_OPCODE_RSL 11
#define GEN7_OPCODE_ASR 12
#define GEN7_OPCODE_CMP 16
#define GEN7_OPCODE_JMPI 32
#define GEN7_OPCODE_IF 34
#define GEN7_OPCODE_IFF 35
#define GEN7_OPCODE_ELSE 36
#define GEN7_OPCODE_ENDIF 37
#define GEN7_OPCODE_DO 38
#define GEN7_OPCODE_WHILE 39
#define GEN7_OPCODE_BREAK 40
#define GEN7_OPCODE_CONTINUE 41
#define GEN7_OPCODE_HALT 42
#define GEN7_OPCODE_MSAVE 44
#define GEN7_OPCODE_MRESTORE 45
#define GEN7_OPCODE_PUSH 46
#define GEN7_OPCODE_POP 47
#define GEN7_OPCODE_WAIT 48
#define GEN7_OPCODE_SEND 49
#define GEN7_OPCODE_ADD 64
#define GEN7_OPCODE_MUL 65
#define GEN7_OPCODE_AVG 66
#define GEN7_OPCODE_FRC 67
#define GEN7_OPCODE_RNDU 68
#define GEN7_OPCODE_RNDD 69
#define GEN7_OPCODE_RNDE 70
#define GEN7_OPCODE_RNDZ 71
#define GEN7_OPCODE_MAC 72
#define GEN7_OPCODE_MACH 73
#define GEN7_OPCODE_LZD 74
#define GEN7_OPCODE_SAD2 80
#define GEN7_OPCODE_SADA2 81
#define GEN7_OPCODE_DP4 84
#define GEN7_OPCODE_DPH 85
#define GEN7_OPCODE_DP3 86
#define GEN7_OPCODE_DP2 87
#define GEN7_OPCODE_DPA2 88
#define GEN7_OPCODE_LINE 89
#define GEN7_OPCODE_NOP 126
 
#define GEN7_PREDICATE_NONE 0
#define GEN7_PREDICATE_NORMAL 1
#define GEN7_PREDICATE_ALIGN1_ANYV 2
#define GEN7_PREDICATE_ALIGN1_ALLV 3
#define GEN7_PREDICATE_ALIGN1_ANY2H 4
#define GEN7_PREDICATE_ALIGN1_ALL2H 5
#define GEN7_PREDICATE_ALIGN1_ANY4H 6
#define GEN7_PREDICATE_ALIGN1_ALL4H 7
#define GEN7_PREDICATE_ALIGN1_ANY8H 8
#define GEN7_PREDICATE_ALIGN1_ALL8H 9
#define GEN7_PREDICATE_ALIGN1_ANY16H 10
#define GEN7_PREDICATE_ALIGN1_ALL16H 11
#define GEN7_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN7_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN7_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN7_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN7_PREDICATE_ALIGN16_ANY4H 6
#define GEN7_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN7_ARCHITECTURE_REGISTER_FILE 0
#define GEN7_GENERAL_REGISTER_FILE 1
#define GEN7_MESSAGE_REGISTER_FILE 2
#define GEN7_IMMEDIATE_VALUE 3
 
#define GEN7_REGISTER_TYPE_UD 0
#define GEN7_REGISTER_TYPE_D 1
#define GEN7_REGISTER_TYPE_UW 2
#define GEN7_REGISTER_TYPE_W 3
#define GEN7_REGISTER_TYPE_UB 4
#define GEN7_REGISTER_TYPE_B 5
#define GEN7_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN7_REGISTER_TYPE_HF 6
#define GEN7_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN7_REGISTER_TYPE_F 7
 
#define GEN7_ARF_NULL 0x00
#define GEN7_ARF_ADDRESS 0x10
#define GEN7_ARF_ACCUMULATOR 0x20
#define GEN7_ARF_FLAG 0x30
#define GEN7_ARF_MASK 0x40
#define GEN7_ARF_MASK_STACK 0x50
#define GEN7_ARF_MASK_STACK_DEPTH 0x60
#define GEN7_ARF_STATE 0x70
#define GEN7_ARF_CONTROL 0x80
#define GEN7_ARF_NOTIFICATION_COUNT 0x90
#define GEN7_ARF_IP 0xA0
 
#define GEN7_AMASK 0
#define GEN7_IMASK 1
#define GEN7_LMASK 2
#define GEN7_CMASK 3
 
#define GEN7_THREAD_NORMAL 0
#define GEN7_THREAD_ATOMIC 1
#define GEN7_THREAD_SWITCH 2
 
#define GEN7_VERTICAL_STRIDE_0 0
#define GEN7_VERTICAL_STRIDE_1 1
#define GEN7_VERTICAL_STRIDE_2 2
#define GEN7_VERTICAL_STRIDE_4 3
#define GEN7_VERTICAL_STRIDE_8 4
#define GEN7_VERTICAL_STRIDE_16 5
#define GEN7_VERTICAL_STRIDE_32 6
#define GEN7_VERTICAL_STRIDE_64 7
#define GEN7_VERTICAL_STRIDE_128 8
#define GEN7_VERTICAL_STRIDE_256 9
#define GEN7_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN7_WIDTH_1 0
#define GEN7_WIDTH_2 1
#define GEN7_WIDTH_4 2
#define GEN7_WIDTH_8 3
#define GEN7_WIDTH_16 4
 
#define GEN7_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN7_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN7_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN7_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN7_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN7_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN7_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN7_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN7_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN7_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN7_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN7_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN7_POLYGON_FACING_FRONT 0
#define GEN7_POLYGON_FACING_BACK 1
 
#define GEN7_MESSAGE_TARGET_NULL 0
#define GEN7_MESSAGE_TARGET_MATH 1
#define GEN7_MESSAGE_TARGET_SAMPLER 2
#define GEN7_MESSAGE_TARGET_GATEWAY 3
#define GEN7_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN7_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN7_MESSAGE_TARGET_URB 6
#define GEN7_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN7_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN7_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN7_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN7_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN7_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN7_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN7_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN7_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN7_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN7_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN7_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN7_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN7_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN7_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN7_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN7_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN7_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN7_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN7_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN7_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
#define GEN7_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define GEN7_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2
#define GEN7_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
 
#define GEN7_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN7_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN7_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN7_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define GEN7_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2
#define GEN7_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define GEN7_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define GEN7_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define GEN7_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
#define GEN7_MATH_FUNCTION_INV 1
#define GEN7_MATH_FUNCTION_LOG 2
#define GEN7_MATH_FUNCTION_EXP 3
#define GEN7_MATH_FUNCTION_SQRT 4
#define GEN7_MATH_FUNCTION_RSQ 5
#define GEN7_MATH_FUNCTION_SIN 6 /* was 7 */
#define GEN7_MATH_FUNCTION_COS 7 /* was 8 */
#define GEN7_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define GEN7_MATH_FUNCTION_TAN 9
#define GEN7_MATH_FUNCTION_POW 10
#define GEN7_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define GEN7_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define GEN7_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define GEN7_MATH_INTEGER_UNSIGNED 0
#define GEN7_MATH_INTEGER_SIGNED 1
 
#define GEN7_MATH_PRECISION_FULL 0
#define GEN7_MATH_PRECISION_PARTIAL 1
 
#define GEN7_MATH_SATURATE_NONE 0
#define GEN7_MATH_SATURATE_SATURATE 1
 
#define GEN7_MATH_DATA_VECTOR 0
#define GEN7_MATH_DATA_SCALAR 1
 
#define GEN7_URB_OPCODE_WRITE 0
 
#define GEN7_URB_SWIZZLE_NONE 0
#define GEN7_URB_SWIZZLE_INTERLEAVE 1
#define GEN7_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN7_SCRATCH_SPACE_SIZE_1K 0
#define GEN7_SCRATCH_SPACE_SIZE_2K 1
#define GEN7_SCRATCH_SPACE_SIZE_4K 2
#define GEN7_SCRATCH_SPACE_SIZE_8K 3
#define GEN7_SCRATCH_SPACE_SIZE_16K 4
#define GEN7_SCRATCH_SPACE_SIZE_32K 5
#define GEN7_SCRATCH_SPACE_SIZE_64K 6
#define GEN7_SCRATCH_SPACE_SIZE_128K 7
#define GEN7_SCRATCH_SPACE_SIZE_256K 8
#define GEN7_SCRATCH_SPACE_SIZE_512K 9
#define GEN7_SCRATCH_SPACE_SIZE_1M 10
#define GEN7_SCRATCH_SPACE_SIZE_2M 11
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen7_sampler_default_border_color {
float color[4];
};
 
struct gen7_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen7_blend_state {
struct {
uint32_t dest_blend_factor:5;
uint32_t source_blend_factor:5;
uint32_t pad3:1;
uint32_t blend_func:3;
uint32_t pad2:1;
uint32_t ia_dest_blend_factor:5;
uint32_t ia_source_blend_factor:5;
uint32_t pad1:1;
uint32_t ia_blend_func:3;
uint32_t pad0:1;
uint32_t ia_blend_enable:1;
uint32_t blend_enable:1;
} blend0;
 
struct {
uint32_t post_blend_clamp_enable:1;
uint32_t pre_blend_clamp_enable:1;
uint32_t clamp_range:2;
uint32_t pad0:4;
uint32_t x_dither_offset:2;
uint32_t y_dither_offset:2;
uint32_t dither_enable:1;
uint32_t alpha_test_func:3;
uint32_t alpha_test_enable:1;
uint32_t pad1:1;
uint32_t logic_op_func:4;
uint32_t logic_op_enable:1;
uint32_t pad2:1;
uint32_t write_disable_b:1;
uint32_t write_disable_g:1;
uint32_t write_disable_r:1;
uint32_t write_disable_a:1;
uint32_t pad3:1;
uint32_t alpha_to_coverage_dither:1;
uint32_t alpha_to_one:1;
uint32_t alpha_to_coverage:1;
} blend1;
};
 
struct gen7_color_calc_state {
struct {
uint32_t alpha_test_format:1;
uint32_t pad0:14;
uint32_t round_disable:1;
uint32_t bf_stencil_ref:8;
uint32_t stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
uint32_t ui:8;
uint32_t pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen7_depth_stencil_state {
struct {
uint32_t pad0:3;
uint32_t bf_stencil_pass_depth_pass_op:3;
uint32_t bf_stencil_pass_depth_fail_op:3;
uint32_t bf_stencil_fail_op:3;
uint32_t bf_stencil_func:3;
uint32_t bf_stencil_enable:1;
uint32_t pad1:2;
uint32_t stencil_write_enable:1;
uint32_t stencil_pass_depth_pass_op:3;
uint32_t stencil_pass_depth_fail_op:3;
uint32_t stencil_fail_op:3;
uint32_t stencil_func:3;
uint32_t stencil_enable:1;
} ds0;
 
struct {
uint32_t bf_stencil_write_mask:8;
uint32_t bf_stencil_test_mask:8;
uint32_t stencil_write_mask:8;
uint32_t stencil_test_mask:8;
} ds1;
 
struct {
uint32_t pad0:26;
uint32_t depth_write_enable:1;
uint32_t depth_test_func:3;
uint32_t pad1:1;
uint32_t depth_test_enable:1;
} ds2;
};
 
struct gen7_surface_state {
struct {
unsigned int cube_pos_z:1;
unsigned int cube_neg_z:1;
unsigned int cube_pos_y:1;
unsigned int cube_neg_y:1;
unsigned int cube_pos_x:1;
unsigned int cube_neg_x:1;
unsigned int pad2:2;
unsigned int render_cache_read_write:1;
unsigned int pad1:1;
unsigned int surface_array_spacing:1;
unsigned int vert_line_stride_ofs:1;
unsigned int vert_line_stride:1;
unsigned int tile_walk:1;
unsigned int tiled_surface:1;
unsigned int horizontal_alignment:1;
unsigned int vertical_alignment:2;
unsigned int surface_format:9; /**< BRW_SURFACEFORMAT_x */
unsigned int pad0:1;
unsigned int is_array:1;
unsigned int surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */
} ss0;
 
struct {
unsigned int base_addr;
} ss1;
 
struct {
unsigned int width:14;
unsigned int pad1:2;
unsigned int height:14;
unsigned int pad0:2;
} ss2;
 
struct {
unsigned int pitch:18;
unsigned int pad:3;
unsigned int depth:11;
} ss3;
 
struct {
unsigned int multisample_position_palette_index:3;
unsigned int num_multisamples:3;
unsigned int multisampled_surface_storage_format:1;
unsigned int render_target_view_extent:11;
unsigned int min_array_elt:11;
unsigned int rotation:2;
unsigned int pad0:1;
} ss4;
 
struct {
unsigned int mip_count:4;
unsigned int min_lod:4;
unsigned int pad1:12;
unsigned int y_offset:4;
unsigned int pad0:1;
unsigned int x_offset:7;
} ss5;
 
struct {
unsigned int pad; /* Multisample Control Surface stuff */
} ss6;
 
struct {
unsigned int resource_min_lod:12;
unsigned int pad0:16;
unsigned int alpha_clear_color:1;
unsigned int blue_clear_color:1;
unsigned int green_clear_color:1;
unsigned int red_clear_color:1;
} ss7;
};
 
struct gen7_sampler_state {
struct {
unsigned int aniso_algorithm:1;
unsigned int lod_bias:13;
unsigned int min_filter:3;
unsigned int mag_filter:3;
unsigned int mip_filter:2;
unsigned int base_level:5;
unsigned int pad1:1;
unsigned int lod_preclamp:1;
unsigned int default_color_mode:1;
unsigned int pad0:1;
unsigned int disable:1;
} ss0;
 
struct {
unsigned int cube_control_mode:1;
unsigned int shadow_function:3;
unsigned int pad:4;
unsigned int max_lod:12;
unsigned int min_lod:12;
} ss1;
 
struct {
unsigned int pad:5;
unsigned int default_color_pointer:27;
} ss2;
 
struct {
unsigned int r_wrap_mode:3;
unsigned int t_wrap_mode:3;
unsigned int s_wrap_mode:3;
unsigned int pad:1;
unsigned int non_normalized_coord:1;
unsigned int trilinear_quality:2;
unsigned int address_round:6;
unsigned int max_aniso:3;
unsigned int chroma_key_mode:1;
unsigned int chroma_key_index:2;
unsigned int chroma_key_enable:1;
unsigned int pad0:6;
} ss3;
};
 
/* Surface state DW0 */
#define GEN7_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN7_SURFACE_VALIGN_4 (1 << 16)
#define GEN7_SURFACE_HALIGN_8 (1 << 15)
#define GEN7_SURFACE_TILED (1 << 14)
#define GEN7_SURFACE_TILED_Y (1 << 13)
#define GEN7_SURFACE_FORMAT_SHIFT 18
#define GEN7_SURFACE_TYPE_SHIFT 29
 
/* Surface state DW2 */
#define GEN7_SURFACE_HEIGHT_SHIFT 16
#define GEN7_SURFACE_WIDTH_SHIFT 0
 
/* Surface state DW3 */
#define GEN7_SURFACE_DEPTH_SHIFT 21
#define GEN7_SURFACE_PITCH_SHIFT 0
 
#define HSW_SWIZZLE_ZERO 0
#define HSW_SWIZZLE_ONE 1
#define HSW_SWIZZLE_RED 4
#define HSW_SWIZZLE_GREEN 5
#define HSW_SWIZZLE_BLUE 6
#define HSW_SWIZZLE_ALPHA 7
#define __HSW_SURFACE_SWIZZLE(r,g,b,a) \
((a) << 16 | (b) << 19 | (g) << 22 | (r) << 25)
#define HSW_SURFACE_SWIZZLE(r,g,b,a) \
__HSW_SURFACE_SWIZZLE(HSW_SWIZZLE_##r, HSW_SWIZZLE_##g, HSW_SWIZZLE_##b, HSW_SWIZZLE_##a)
 
/* _3DSTATE_VERTEX_BUFFERS on GEN7*/
/* DW1 */
#define GEN7_VB0_ADDRESS_MODIFYENABLE (1 << 14)
 
/* _3DPRIMITIVE on GEN7 */
/* DW1 */
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8)
# define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8)
 
#define GEN7_3DSTATE_CLEAR_PARAMS GEN7_3D(3, 0, 0x04)
#define GEN7_3DSTATE_DEPTH_BUFFER GEN7_3D(3, 0, 0x05)
# define GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
/* DW1 */
# define GEN7_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define GEN7_3DSTATE_CONSTANT_HS GEN7_3D(3, 0, 0x19)
#define GEN7_3DSTATE_CONSTANT_DS GEN7_3D(3, 0, 0x1a)
 
#define GEN7_3DSTATE_HS GEN7_3D(3, 0, 0x1b)
#define GEN7_3DSTATE_TE GEN7_3D(3, 0, 0x1c)
#define GEN7_3DSTATE_DS GEN7_3D(3, 0, 0x1d)
#define GEN7_3DSTATE_STREAMOUT GEN7_3D(3, 0, 0x1e)
#define GEN7_3DSTATE_SBE GEN7_3D(3, 0, 0x1f)
 
/* DW1 */
# define GEN7_SBE_SWIZZLE_CONTROL_MODE (1 << 28)
# define GEN7_SBE_NUM_OUTPUTS_SHIFT 22
# define GEN7_SBE_SWIZZLE_ENABLE (1 << 21)
# define GEN7_SBE_POINT_SPRITE_LOWERLEFT (1 << 20)
# define GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT 4
 
#define GEN7_3DSTATE_PS GEN7_3D(3, 0, 0x20)
/* DW1: kernel pointer */
/* DW2 */
# define GEN7_PS_SPF_MODE (1 << 31)
# define GEN7_PS_VECTOR_MASK_ENABLE (1 << 30)
# define GEN7_PS_SAMPLER_COUNT_SHIFT 27
# define GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
# define GEN7_PS_FLOATING_POINT_MODE_IEEE_754 (0 << 16)
# define GEN7_PS_FLOATING_POINT_MODE_ALT (1 << 16)
/* DW3: scratch space */
/* DW4 */
# define IVB_PS_MAX_THREADS_SHIFT 24
# define HSW_PS_MAX_THREADS_SHIFT 23
# define HSW_PS_SAMPLE_MASK_SHIFT 12
# define GEN7_PS_PUSH_CONSTANT_ENABLE (1 << 11)
# define GEN7_PS_ATTRIBUTE_ENABLE (1 << 10)
# define GEN7_PS_OMASK_TO_RENDER_TARGET (1 << 9)
# define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE (1 << 7)
# define GEN7_PS_POSOFFSET_NONE (0 << 3)
# define GEN7_PS_POSOFFSET_CENTROID (2 << 3)
# define GEN7_PS_POSOFFSET_SAMPLE (3 << 3)
# define GEN7_PS_32_DISPATCH_ENABLE (1 << 2)
# define GEN7_PS_16_DISPATCH_ENABLE (1 << 1)
# define GEN7_PS_8_DISPATCH_ENABLE (1 << 0)
/* DW5 */
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_0 16
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_1 8
# define GEN7_PS_DISPATCH_START_GRF_SHIFT_2 0
/* DW6: kernel 1 pointer */
/* DW7: kernel 2 pointer */
 
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL GEN7_3D(3, 0, 0x21)
#define GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC GEN7_3D(3, 0, 0x23)
 
#define GEN7_3DSTATE_BLEND_STATE_POINTERS GEN7_3D(3, 0, 0x24)
#define GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS GEN7_3D(3, 0, 0x25)
 
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS GEN7_3D(3, 0, 0x26)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS GEN7_3D(3, 0, 0x27)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS GEN7_3D(3, 0, 0x28)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS GEN7_3D(3, 0, 0x29)
#define GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS GEN7_3D(3, 0, 0x2a)
 
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS GEN7_3D(3, 0, 0x2b)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS GEN7_3D(3, 0, 0x2e)
#define GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS GEN7_3D(3, 0, 0x2f)
 
#define GEN7_3DSTATE_URB_VS GEN7_3D(3, 0, 0x30)
#define GEN7_3DSTATE_URB_HS GEN7_3D(3, 0, 0x31)
#define GEN7_3DSTATE_URB_DS GEN7_3D(3, 0, 0x32)
#define GEN7_3DSTATE_URB_GS GEN7_3D(3, 0, 0x33)
/* DW1 */
# define GEN7_URB_ENTRY_NUMBER_SHIFT 0
# define GEN7_URB_ENTRY_SIZE_SHIFT 16
# define GEN7_URB_STARTING_ADDRESS_SHIFT 25
 
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS GEN7_3D(3, 1, 0x12)
#define GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS GEN7_3D(3, 1, 0x16)
/* DW1 */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
struct gen7_cc_viewport {
float min_depth;
float max_depth;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
#endif
/drivers/video/Intel-2D/sna/intel.h
0,0 → 1,152
 
 
/** enumeration of 3d consumers so some can maintain invariant state. */
enum last_3d {
LAST_3D_OTHER,
LAST_3D_VIDEO,
LAST_3D_RENDER,
LAST_3D_ROTATION
};
 
 
 
typedef struct intel_screen_private {
int cpp;
 
#define RENDER_BATCH I915_EXEC_RENDER
#define BLT_BATCH I915_EXEC_BLT
 
unsigned int current_batch;
 
dri_bufmgr *bufmgr;
 
uint32_t batch_ptr[4096];
/** Byte offset in batch_ptr for the next dword to be emitted. */
unsigned int batch_used;
/** Position in batch_ptr at the start of the current BEGIN_BATCH */
unsigned int batch_emit_start;
/** Number of bytes to be emitted in the current BEGIN_BATCH. */
uint32_t batch_emitting;
dri_bo *batch_bo, *last_batch_bo[2];
/** Whether we're in a section of code that can't tolerate flushing */
Bool in_batch_atomic;
/** Ending batch_used that was verified by intel_start_batch_atomic() */
int batch_atomic_limit;
struct list batch_pixmaps;
drm_intel_bo *wa_scratch_bo;
 
unsigned int tiling;
 
#define INTEL_TILING_FB 0x1
#define INTEL_TILING_2D 0x2
#define INTEL_TILING_3D 0x4
#define INTEL_TILING_ALL (~0)
 
Bool has_relaxed_fencing;
 
int Chipset;
 
unsigned int BR[20];
 
void (*vertex_flush) (struct intel_screen_private *intel);
void (*batch_flush) (struct intel_screen_private *intel);
void (*batch_commit_notify) (struct intel_screen_private *intel);
 
Bool need_sync;
 
int accel_pixmap_offset_alignment;
int accel_max_x;
int accel_max_y;
int max_bo_size;
int max_gtt_map_size;
int max_tiling_size;
 
struct {
drm_intel_bo *gen4_vs_bo;
drm_intel_bo *gen4_sf_bo;
drm_intel_bo *gen4_wm_packed_bo;
drm_intel_bo *gen4_wm_planar_bo;
drm_intel_bo *gen4_cc_bo;
drm_intel_bo *gen4_cc_vp_bo;
drm_intel_bo *gen4_sampler_bo;
drm_intel_bo *gen4_sip_kernel_bo;
drm_intel_bo *wm_prog_packed_bo;
drm_intel_bo *wm_prog_planar_bo;
drm_intel_bo *gen6_blend_bo;
drm_intel_bo *gen6_depth_stencil_bo;
} video;
 
/* Render accel state */
float scale_units[2][2];
/** Transform pointers for src/mask, or NULL if identity */
PictTransform *transform[2];
 
PixmapPtr render_source, render_mask, render_dest;
PicturePtr render_source_picture, render_mask_picture, render_dest_picture;
Bool needs_3d_invariant;
Bool needs_render_state_emit;
Bool needs_render_vertex_emit;
 
/* i830 render accel state */
uint32_t render_dest_format;
uint32_t cblend, ablend, s8_blendctl;
 
/* i915 render accel state */
PixmapPtr texture[2];
uint32_t mapstate[6];
uint32_t samplerstate[6];
 
struct {
int op;
uint32_t dst_format;
} i915_render_state;
 
struct {
int num_sf_outputs;
int drawrect;
uint32_t blend;
dri_bo *samplers;
dri_bo *kernel;
} gen6_render_state;
 
uint32_t prim_offset;
void (*prim_emit)(struct intel_screen_private *intel,
int srcX, int srcY,
int maskX, int maskY,
int dstX, int dstY,
int w, int h);
int floats_per_vertex;
int last_floats_per_vertex;
uint16_t vertex_offset;
uint16_t vertex_count;
uint16_t vertex_index;
uint16_t vertex_used;
uint32_t vertex_id;
float vertex_ptr[4*1024];
dri_bo *vertex_bo;
 
uint8_t surface_data[16*1024];
uint16_t surface_used;
uint16_t surface_table;
uint32_t surface_reloc;
dri_bo *surface_bo;
 
/* 965 render acceleration state */
struct gen4_render_state *gen4_render_state;
 
Bool use_pageflipping;
Bool use_triple_buffer;
Bool force_fallback;
Bool has_kernel_flush;
Bool needs_flush;
 
enum last_3d last_3d;
 
/**
* User option to print acceleration fallback info to the server log.
*/
Bool fallback_debug;
unsigned debug_flush;
Bool has_prime_vmap_flush;
} intel_screen_private;
 
/drivers/video/Intel-2D/sna/kgem.c
0,0 → 1,5420
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
 
#include <time.h>
#include <errno.h>
#include <fcntl.h>
 
#ifdef HAVE_VALGRIND
#include <valgrind.h>
#include <memcheck.h>
#endif
 
#ifdef HAVE_STRUCT_SYSINFO_TOTALRAM
#include <sys/sysinfo.h>
#endif
 
#include "sna_cpuid.h"
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
#define DBG_NO_HW 0
#define DBG_NO_TILING 0
#define DBG_NO_CACHE 0
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_CREATE2 1
#define DBG_NO_USERPTR 0
#define DBG_NO_UNSYNCHRONIZED_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 1
#define DBG_NO_UPLOAD_CACHE 0
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 1
#define DBG_NO_WT 0
#define DBG_DUMP 0
 
#define FORCE_MMAP_SYNC 0 /* ((1 << DOMAIN_CPU) | (1 << DOMAIN_GTT)) */
 
#ifndef DEBUG_SYNC
#define DEBUG_SYNC 0
#endif
 
 
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
#else
#define ASSERT_IDLE(kgem__, handle__)
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__)
#endif
 
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
* offsets be aligned to a cacheline (64 bytes).
*/
#define UPLOAD_ALIGNMENT 128
 
#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
#define NUM_PAGES(x) (((x) + PAGE_SIZE-1) / PAGE_SIZE)
 
#define MAX_GTT_VMA_CACHE 512
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
#define LOCAL_I915_PARAM_HAS_WT 27
 
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
struct local_i915_gem_userptr {
uint64_t user_ptr;
uint64_t user_size;
uint32_t flags;
#define I915_USERPTR_READ_ONLY (1<<0)
#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
uint32_t handle;
};
 
#define UNCACHED 0
#define SNOOPED 1
#define DISPLAY 2
 
struct local_i915_gem_caching {
uint32_t handle;
uint32_t caching;
};
 
#define LOCAL_IOCTL_I915_GEM_SET_CACHING SRV_I915_GEM_SET_CACHING
 
struct local_fbinfo {
int width;
int height;
int pitch;
int tiling;
};
 
struct kgem_buffer {
struct kgem_bo base;
void *mem;
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
};
 
static struct kgem_bo *__kgem_freed_bo;
static struct kgem_request *__kgem_freed_request;
static struct drm_i915_gem_exec_object2 _kgem_dummy_exec;
 
static inline int bytes(struct kgem_bo *bo)
{
return __kgem_bo_size(bo);
}
 
#define bucket(B) (B)->size.pages.bucket
#define num_pages(B) (B)->size.pages.count
 
#ifdef DEBUG_MEMORY
static void debug_alloc(struct kgem *kgem, size_t size)
{
kgem->debug_memory.bo_allocs++;
kgem->debug_memory.bo_bytes += size;
}
static void debug_alloc__bo(struct kgem *kgem, struct kgem_bo *bo)
{
debug_alloc(kgem, bytes(bo));
}
#else
#define debug_alloc(k, b)
#define debug_alloc__bo(k, b)
#endif
 
#ifndef NDEBUG
static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_get_tiling tiling;
 
assert(bo);
 
VG_CLEAR(tiling);
tiling.handle = bo->handle;
tiling.tiling_mode = -1;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling);
assert(tiling.tiling_mode == bo->tiling);
}
#else
#define assert_tiling(kgem, bo)
#endif
 
static void kgem_sna_reset(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.reset(sna);
sna->blt_state.fill_bo = 0;
}
 
static void kgem_sna_flush(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
 
sna->render.flush(sna);
 
// if (sna->render.solid_cache.dirty)
// sna_render_flush_solid(sna);
}
 
static bool gem_set_tiling(int fd, uint32_t handle, int tiling, int stride)
{
struct drm_i915_gem_set_tiling set_tiling;
int ret;
 
if (DBG_NO_TILING)
return false;
 
VG_CLEAR(set_tiling);
do {
set_tiling.handle = handle;
set_tiling.tiling_mode = tiling;
set_tiling.stride = stride;
 
ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling);
} while (ret != 0);
return ret == 0;
}
 
static bool gem_set_caching(int fd, uint32_t handle, int caching)
{
struct local_i915_gem_caching arg;
 
VG_CLEAR(arg);
arg.handle = handle;
arg.caching = caching;
return drmIoctl(fd, LOCAL_IOCTL_I915_GEM_SET_CACHING, &arg) == 0;
}
 
 
 
 
 
static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: not retiring per-request\n", __FUNCTION__));
return false;
}
 
if (!kgem->need_retire) {
DBG(("%s: nothing to retire\n", __FUNCTION__));
return false;
}
 
if (kgem_retire(kgem))
return true;
 
if (flags & CREATE_NO_THROTTLE || !kgem->need_throttle) {
DBG(("%s: not throttling\n", __FUNCTION__));
return false;
}
 
kgem_throttle(kgem);
return kgem_retire(kgem);
}
 
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
void *ptr;
 
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
assert(kgem_bo_can_map(kgem, bo));
 
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) {
 
(void)__kgem_throttle_retire(kgem, 0);
if (kgem_expire_cache(kgem))
goto retry_gtt;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry_gtt;
}
 
printf("%s: failed to retrieve GTT offset for handle=%d\n",
__FUNCTION__, bo->handle);
return NULL;
}
 
retry_mmap:
ptr = (void*)(int)mmap_arg.offset;
if (ptr == NULL) {
ErrorF("%s: failed to mmap handle=%d, %d bytes, into GTT domain\n",
__FUNCTION__, bo->handle, bytes(bo));
ptr = NULL;
}
 
return ptr;
}
 
static int __gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
static int gem_write(int fd, uint32_t handle,
int offset, int length,
const void *src)
{
struct drm_i915_gem_pwrite pwrite;
 
DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__,
handle, offset, length));
 
VG_CLEAR(pwrite);
pwrite.handle = handle;
/* align the transfer to cachelines; fortuitously this is safe! */
if ((offset | length) & 63) {
pwrite.offset = offset & ~63;
pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
} else {
pwrite.offset = offset;
pwrite.size = length;
pwrite.data_ptr = (uintptr_t)src;
}
return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
}
 
 
bool __kgem_busy(struct kgem *kgem, int handle)
{
struct drm_i915_gem_busy busy;
 
VG_CLEAR(busy);
busy.handle = handle;
busy.busy = !kgem->wedged;
(void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
DBG(("%s: handle=%d, busy=%d, wedged=%d\n",
__FUNCTION__, handle, busy.busy, kgem->wedged));
 
return busy.busy;
}
 
static void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: retiring bo handle=%d (needed flush? %d), rq? %d [busy?=%d]\n",
__FUNCTION__, bo->handle, bo->needs_flush, bo->rq != NULL,
__kgem_busy(kgem, bo->handle)));
assert(bo->exec == NULL);
assert(list_is_empty(&bo->vma));
 
if (bo->rq) {
if (!__kgem_busy(kgem, bo->handle)) {
__kgem_bo_clear_busy(bo);
kgem_retire(kgem);
}
} else {
assert(!bo->needs_flush);
ASSERT_IDLE(kgem, bo->handle);
}
}
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
assert(bo->refcnt);
assert(!bo->purged);
assert(bo->proxy == NULL);
ASSERT_IDLE(kgem, bo->handle);
 
assert(length <= bytes(bo));
if (gem_write(kgem->fd, bo->handle, 0, length, data))
return false;
 
DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
if (bo->exec == NULL) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_NONE;
}
bo->gtt_dirty = true;
return true;
}
 
static uint32_t gem_create(int fd, int num_pages)
{
struct drm_i915_gem_create create;
 
VG_CLEAR(create);
create.handle = 0;
create.size = PAGE_SIZE * num_pages;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create);
 
return create.handle;
}
 
static bool
kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->exec == NULL);
assert(!bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = 1;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return true;
#endif
}
 
static bool
kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
if (!bo->purged)
return true;
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_DONTNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
return madv.retained;
 
return false;
#endif
}
 
static bool
kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
{
#if DBG_NO_MADV
return true;
#else
struct drm_i915_gem_madvise madv;
 
assert(bo->purged);
 
VG_CLEAR(madv);
madv.handle = bo->handle;
madv.madv = I915_MADV_WILLNEED;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
bo->purged = !madv.retained;
kgem->need_purge |= !madv.retained && bo->domain == DOMAIN_GPU;
return madv.retained;
}
 
return false;
#endif
}
 
static void gem_close(int fd, uint32_t handle)
{
struct drm_gem_close close;
 
VG_CLEAR(close);
close.handle = handle;
(void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
}
 
constant inline static unsigned long __fls(unsigned long word)
{
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86__) || defined(__x86_64__))
asm("bsr %1,%0"
: "=r" (word)
: "rm" (word));
return word;
#else
unsigned int v = 0;
 
while (word >>= 1)
v++;
 
return v;
#endif
}
 
constant inline static int cache_bucket(int num_pages)
{
return __fls(num_pages);
}
 
static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
int handle, int num_pages)
{
assert(num_pages);
memset(bo, 0, sizeof(*bo));
 
bo->refcnt = 1;
bo->handle = handle;
bo->target_handle = -1;
num_pages(bo) = num_pages;
bucket(bo) = cache_bucket(num_pages);
bo->reusable = true;
bo->domain = DOMAIN_CPU;
list_init(&bo->request);
list_init(&bo->list);
list_init(&bo->vma);
 
return bo;
}
 
static struct kgem_bo *__kgem_bo_alloc(int handle, int num_pages)
{
struct kgem_bo *bo;
 
if (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
} else {
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
}
 
return __kgem_bo_init(bo, handle, num_pages);
}
 
static struct kgem_request *__kgem_request_alloc(struct kgem *kgem)
{
struct kgem_request *rq;
 
rq = __kgem_freed_request;
if (rq) {
__kgem_freed_request = *(struct kgem_request **)rq;
} else {
rq = malloc(sizeof(*rq));
if (rq == NULL)
rq = &kgem->static_request;
}
 
list_init(&rq->buffers);
rq->bo = NULL;
rq->ring = 0;
 
return rq;
}
 
static void __kgem_request_free(struct kgem_request *rq)
{
_list_del(&rq->list);
*(struct kgem_request **)rq = __kgem_freed_request;
__kgem_freed_request = rq;
}
 
static struct list *inactive(struct kgem *kgem, int num_pages)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->inactive[cache_bucket(num_pages)];
}
 
static struct list *active(struct kgem *kgem, int num_pages, int tiling)
{
assert(num_pages < MAX_CACHE_SIZE / PAGE_SIZE);
assert(cache_bucket(num_pages) < NUM_CACHE_BUCKETS);
return &kgem->active[cache_bucket(num_pages)][tiling];
}
 
static size_t
agp_aperture_size(struct pci_device *dev, unsigned gen)
{
/* XXX assume that only future chipsets are unknown and follow
* the post gen2 PCI layout.
*/
return 0;
}
 
static size_t
total_ram_size(void)
{
uint32_t data[9];
size_t size = 0;
 
asm volatile("int $0x40"
: "=a" (size)
: "a" (18),"b"(20), "c" (data)
: "memory");
 
return size != -1 ? size : 0;
}
 
static unsigned
cpu_cache_size__cpuid4(void)
{
/* Deterministic Cache Parmaeters (Function 04h)":
* When EAX is initialized to a value of 4, the CPUID instruction
* returns deterministic cache information in the EAX, EBX, ECX
* and EDX registers. This function requires ECX be initialized
* with an index which indicates which cache to return information
* about. The OS is expected to call this function (CPUID.4) with
* ECX = 0, 1, 2, until EAX[4:0] == 0, indicating no more caches.
* The order in which the caches are returned is not specified
* and may change at Intel's discretion.
*
* Calculating the Cache Size in bytes:
* = (Ways +1) * (Partitions +1) * (Line Size +1) * (Sets +1)
*/
 
unsigned int eax, ebx, ecx, edx;
unsigned int llc_size = 0;
int cnt = 0;
 
if (__get_cpuid_max(BASIC_CPUID, NULL) < 4)
return 0;
 
do {
unsigned associativity, line_partitions, line_size, sets;
 
__cpuid_count(4, cnt++, eax, ebx, ecx, edx);
 
if ((eax & 0x1f) == 0)
break;
 
associativity = ((ebx >> 22) & 0x3ff) + 1;
line_partitions = ((ebx >> 12) & 0x3ff) + 1;
line_size = (ebx & 0xfff) + 1;
sets = ecx + 1;
 
llc_size = associativity * line_partitions * line_size * sets;
} while (1);
 
return llc_size;
}
 
static int gem_param(struct kgem *kgem, int name)
{
drm_i915_getparam_t gp;
int v = -1; /* No param uses the sign bit, reserve it for errors */
 
VG_CLEAR(gp);
gp.param = name;
gp.value = &v;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp))
return -1;
 
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
return v;
}
 
static bool test_has_execbuffer2(struct kgem *kgem)
{
return 1;
}
 
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
 
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
 
static bool test_has_wt(struct kgem *kgem)
{
if (DBG_NO_WT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_WT) > 0;
}
 
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
bool detected = false;
int ret;
 
if (DBG_NO_SEMAPHORES)
return false;
 
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
if (ret != -1)
return ret > 0;
 
return detected;
}
 
static bool __kgem_throttle(struct kgem *kgem)
{
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_THROTTLE, NULL) == 0)
return false;
 
return errno == EIO;
}
 
static bool is_hw_supported(struct kgem *kgem,
struct pci_device *dev)
{
if (DBG_NO_HW)
return false;
 
if (!test_has_execbuffer2(kgem))
return false;
 
if (kgem->gen == (unsigned)-1) /* unknown chipset, assume future gen */
return kgem->has_blt;
 
/* Although pre-855gm the GMCH is fubar, it works mostly. So
* let the user decide through "NoAccel" whether or not to risk
* hw acceleration.
*/
 
if (kgem->gen == 060 && dev->revision < 8) {
/* pre-production SNB with dysfunctional BLT */
return false;
}
 
if (kgem->gen >= 060) /* Only if the kernel supports the BLT ring */
return kgem->has_blt;
 
return true;
}
 
static bool test_has_relaxed_fencing(struct kgem *kgem)
{
if (kgem->gen < 040) {
if (DBG_NO_RELAXED_FENCING)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_FENCING) > 0;
} else
return true;
}
 
static bool test_has_llc(struct kgem *kgem)
{
int has_llc = -1;
 
if (DBG_NO_LLC)
return false;
 
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
#endif
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
}
 
return has_llc;
}
 
static bool test_has_caching(struct kgem *kgem)
{
uint32_t handle;
bool ret;
 
if (DBG_NO_CACHE_LEVEL)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
handle = gem_create(kgem->fd, 1);
if (handle == 0)
return false;
 
ret = gem_set_caching(kgem->fd, handle, UNCACHED);
gem_close(kgem->fd, handle);
return ret;
}
 
static bool test_has_userptr(struct kgem *kgem)
{
#if defined(USE_USERPTR)
uint32_t handle;
void *ptr;
 
if (DBG_NO_USERPTR)
return false;
 
/* Incoherent blt and sampler hangs the GPU */
if (kgem->gen == 040)
return false;
 
if (posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE))
return false;
 
handle = gem_userptr(kgem->fd, ptr, PAGE_SIZE, false);
gem_close(kgem->fd, handle);
free(ptr);
 
return handle != 0;
#else
return false;
#endif
}
 
static bool test_has_create2(struct kgem *kgem)
{
#if defined(USE_CREATE2)
struct local_i915_gem_create2 args;
 
if (DBG_NO_CREATE2)
return false;
 
memset(&args, 0, sizeof(args));
args.size = PAGE_SIZE;
args.caching = DISPLAY;
if (drmIoctl(kgem->fd, LOCAL_IOCTL_I915_GEM_CREATE2, &args) == 0)
gem_close(kgem->fd, args.handle);
 
return args.handle != 0;
#else
return false;
#endif
}
 
static bool test_has_secure_batches(struct kgem *kgem)
{
if (DBG_NO_SECURE_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
}
 
static bool test_has_pinned_batches(struct kgem *kgem)
{
if (DBG_NO_PINNED_BATCHES)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
}
 
 
static bool kgem_init_pinned_batches(struct kgem *kgem)
{
int count[2] = { 2, 1 };
int size[2] = { 1, 2 };
int n, i;
 
if (kgem->wedged)
return true;
 
for (n = 0; n < ARRAY_SIZE(count); n++) {
for (i = 0; i < count[n]; i++) {
struct drm_i915_gem_pin pin;
struct kgem_bo *bo;
 
VG_CLEAR(pin);
 
pin.handle = gem_create(kgem->fd, size[n]);
if (pin.handle == 0)
goto err;
 
DBG(("%s: new handle=%d, num_pages=%d\n",
__FUNCTION__, pin.handle, size[n]));
 
bo = __kgem_bo_alloc(pin.handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, pin.handle);
goto err;
}
 
pin.alignment = 0;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_PIN, &pin)) {
gem_close(kgem->fd, pin.handle);
goto err;
}
bo->presumed_offset = pin.offset;
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
}
 
return true;
 
err:
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
 
/* For simplicity populate the lists with a single unpinned bo */
for (n = 0; n < ARRAY_SIZE(count); n++) {
struct kgem_bo *bo;
uint32_t handle;
 
handle = gem_create(kgem->fd, size[n]);
if (handle == 0)
break;
 
bo = __kgem_bo_alloc(handle, size[n]);
if (bo == NULL) {
gem_close(kgem->fd, handle);
break;
}
 
debug_alloc__bo(kgem, bo);
list_add(&bo->list, &kgem->pinned_batches[n]);
}
return false;
}
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
unsigned half_gpu_max;
unsigned int i, j;
 
DBG(("%s: fd=%d, gen=%d\n", __FUNCTION__, fd, gen));
 
memset(kgem, 0, sizeof(*kgem));
 
kgem->fd = fd;
kgem->gen = gen;
 
list_init(&kgem->requests[0]);
list_init(&kgem->requests[1]);
list_init(&kgem->batch_buffers);
list_init(&kgem->active_buffers);
list_init(&kgem->flushing);
list_init(&kgem->large);
list_init(&kgem->large_inactive);
list_init(&kgem->snoop);
list_init(&kgem->scanout);
for (i = 0; i < ARRAY_SIZE(kgem->pinned_batches); i++)
list_init(&kgem->pinned_batches[i]);
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_init(&kgem->inactive[i]);
for (i = 0; i < ARRAY_SIZE(kgem->active); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->active[i]); j++)
list_init(&kgem->active[i][j]);
}
for (i = 0; i < ARRAY_SIZE(kgem->vma); i++) {
for (j = 0; j < ARRAY_SIZE(kgem->vma[i].inactive); j++)
list_init(&kgem->vma[i].inactive[j]);
}
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
 
kgem->has_blt = gem_param(kgem, LOCAL_I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
 
kgem->has_relaxed_delta =
gem_param(kgem, LOCAL_I915_PARAM_HAS_RELAXED_DELTA) > 0;
DBG(("%s: has relaxed delta? %d\n", __FUNCTION__,
kgem->has_relaxed_delta));
 
kgem->has_relaxed_fencing = test_has_relaxed_fencing(kgem);
DBG(("%s: has relaxed fencing? %d\n", __FUNCTION__,
kgem->has_relaxed_fencing));
 
kgem->has_llc = test_has_llc(kgem);
DBG(("%s: has shared last-level-cache? %d\n", __FUNCTION__,
kgem->has_llc));
 
kgem->has_wt = test_has_wt(kgem);
DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
kgem->has_wt));
 
kgem->has_caching = test_has_caching(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_caching));
 
kgem->has_userptr = test_has_userptr(kgem);
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
 
kgem->has_create2 = test_has_create2(kgem);
kgem->has_create2 = 0;
DBG(("%s: has create2? %d\n", __FUNCTION__,
kgem->has_create2));
 
kgem->has_no_reloc = test_has_no_reloc(kgem);
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
 
kgem->has_handle_lut = test_has_handle_lut(kgem);
kgem->has_handle_lut = 0;
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
 
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
DBG(("%s: semaphores enabled? %d\n", __FUNCTION__,
kgem->has_semaphores));
 
kgem->can_blt_cpu = gen >= 030;
DBG(("%s: can blt to cpu? %d\n", __FUNCTION__,
kgem->can_blt_cpu));
 
kgem->has_secure_batches = test_has_secure_batches(kgem);
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
 
kgem->has_pinned_batches = test_has_pinned_batches(kgem);
DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
kgem->has_pinned_batches));
 
if (!is_hw_supported(kgem, dev)) {
printf("Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
kgem->wedged = 1;
} else if (__kgem_throttle(kgem)) {
printf("Detected a hung GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
kgem->batch_size = ARRAY_SIZE(kgem->batch);
if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
if (gen == 022)
/* 865g cannot handle a batch spanning multiple pages */
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
if ((gen >> 3) == 7)
kgem->batch_size = 16*1024;
if (!kgem->has_relaxed_delta && kgem->batch_size > 4*1024)
kgem->batch_size = 4*1024;
 
if (!kgem_init_pinned_batches(kgem) && gen == 020) {
printf("Unable to reserve memory for GPU, disabling acceleration.\n");
kgem->wedged = 1;
}
 
DBG(("%s: maximum batch size? %d\n", __FUNCTION__,
kgem->batch_size));
 
kgem->min_alignment = 4;
if (gen < 040)
kgem->min_alignment = 64;
 
kgem->half_cpu_cache_pages = cpu_cache_size() >> 13;
DBG(("%s: last-level cache size: %d bytes, threshold in pages: %d\n",
__FUNCTION__, cpu_cache_size(), kgem->half_cpu_cache_pages));
 
kgem->next_request = __kgem_request_alloc(kgem);
 
DBG(("%s: cpu bo enabled %d: llc? %d, set-cache-level? %d, userptr? %d\n", __FUNCTION__,
!DBG_NO_CPU && (kgem->has_llc | kgem->has_userptr | kgem->has_caching),
kgem->has_llc, kgem->has_caching, kgem->has_userptr));
 
VG_CLEAR(aperture);
aperture.aper_size = 0;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
if (aperture.aper_size == 0)
aperture.aper_size = 64*1024*1024;
 
DBG(("%s: aperture size %lld, available now %lld\n",
__FUNCTION__,
(long long)aperture.aper_size,
(long long)aperture.aper_available_size));
 
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/3;
if (gen < 033) {
/* Severe alignment penalties */
kgem->aperture_high /= 2;
kgem->aperture_low /= 2;
}
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
kgem->aperture_low, kgem->aperture_low / (1024*1024),
kgem->aperture_high, kgem->aperture_high / (1024*1024)));
 
kgem->aperture_mappable = agp_aperture_size(dev, gen);
if (kgem->aperture_mappable == 0 ||
kgem->aperture_mappable > aperture.aper_size)
kgem->aperture_mappable = aperture.aper_size;
DBG(("%s: aperture mappable=%d [%d MiB]\n", __FUNCTION__,
kgem->aperture_mappable, kgem->aperture_mappable / (1024*1024)));
 
kgem->buffer_size = 64 * 1024;
while (kgem->buffer_size < kgem->aperture_mappable >> 10)
kgem->buffer_size *= 2;
if (kgem->buffer_size >> 12 > kgem->half_cpu_cache_pages)
kgem->buffer_size = kgem->half_cpu_cache_pages << 12;
kgem->buffer_size = 1 << __fls(kgem->buffer_size);
DBG(("%s: buffer size=%d [%d KiB]\n", __FUNCTION__,
kgem->buffer_size, kgem->buffer_size / 1024));
assert(kgem->buffer_size);
 
kgem->max_object_size = 3 * (kgem->aperture_high >> 12) << 10;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc && kgem->max_gpu_size > MAX_CACHE_SIZE)
kgem->max_gpu_size = MAX_CACHE_SIZE;
 
totalram = total_ram_size();
if (totalram == 0) {
DBG(("%s: total ram size unknown, assuming maximum of total aperture\n",
__FUNCTION__));
totalram = kgem->aperture_total;
}
DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
if (kgem->max_object_size > totalram / 2)
kgem->max_object_size = totalram / 2;
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
 
kgem->max_cpu_size = kgem->max_object_size;
 
half_gpu_max = kgem->max_gpu_size / 2;
kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
if (kgem->max_copy_tile_size > half_gpu_max)
kgem->max_copy_tile_size = half_gpu_max;
 
if (kgem->has_llc)
kgem->max_upload_tile_size = kgem->max_copy_tile_size;
else
kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
if (kgem->max_upload_tile_size > half_gpu_max)
kgem->max_upload_tile_size = half_gpu_max;
if (kgem->max_upload_tile_size > kgem->aperture_high/2)
kgem->max_upload_tile_size = kgem->aperture_high/2;
if (kgem->max_upload_tile_size > kgem->aperture_low)
kgem->max_upload_tile_size = kgem->aperture_low;
if (kgem->max_upload_tile_size < 16*PAGE_SIZE)
kgem->max_upload_tile_size = 16*PAGE_SIZE;
 
kgem->large_object_size = MAX_CACHE_SIZE;
if (kgem->large_object_size > half_gpu_max)
kgem->large_object_size = half_gpu_max;
if (kgem->max_copy_tile_size > kgem->aperture_high/2)
kgem->max_copy_tile_size = kgem->aperture_high/2;
if (kgem->max_copy_tile_size > kgem->aperture_low)
kgem->max_copy_tile_size = kgem->aperture_low;
if (kgem->max_copy_tile_size < 16*PAGE_SIZE)
kgem->max_copy_tile_size = 16*PAGE_SIZE;
 
if (kgem->has_llc | kgem->has_caching | kgem->has_userptr) {
if (kgem->large_object_size > kgem->max_cpu_size)
kgem->large_object_size = kgem->max_cpu_size;
} else
kgem->max_cpu_size = 0;
if (DBG_NO_CPU)
kgem->max_cpu_size = 0;
 
DBG(("%s: maximum object size=%d\n",
__FUNCTION__, kgem->max_object_size));
DBG(("%s: large object thresold=%d\n",
__FUNCTION__, kgem->large_object_size));
DBG(("%s: max object sizes (gpu=%d, cpu=%d, tile upload=%d, copy=%d)\n",
__FUNCTION__,
kgem->max_gpu_size, kgem->max_cpu_size,
kgem->max_upload_tile_size, kgem->max_copy_tile_size));
 
/* Convert the aperture thresholds to pages */
kgem->aperture_low /= PAGE_SIZE;
kgem->aperture_high /= PAGE_SIZE;
 
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
 
kgem->batch_flags_base = 0;
if (kgem->has_no_reloc)
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
 
/* XXX hopefully a good approximation */
uint32_t kgem_get_unique_id(struct kgem *kgem)
{
uint32_t id;
id = ++kgem->unique_id;
if (id == 0)
id = ++kgem->unique_id;
return id;
}
 
inline static uint32_t kgem_pitch_alignment(struct kgem *kgem, unsigned flags)
{
if (flags & CREATE_PRIME)
return 256;
if (flags & CREATE_SCANOUT)
return 64;
return kgem->min_alignment;
}
 
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size)
{
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
*tile_width = 128;
*tile_height = 16;
*tile_size = 2048;
} else {
*tile_width = 512;
*tile_height = 8;
*tile_size = 4096;
}
} else {
*tile_width = 1;
*tile_height = 1;
*tile_size = 1;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
*tile_width = 1;
*tile_height = 1;
*tile_size = 1;
break;
case I915_TILING_X:
*tile_width = 512;
*tile_height = 8;
*tile_size = 4096;
break;
case I915_TILING_Y:
*tile_width = 128;
*tile_height = 32;
*tile_size = 4096;
break;
}
}
 
uint32_t kgem_surface_size(struct kgem *kgem,
bool relaxed_fencing,
unsigned flags,
uint32_t width,
uint32_t height,
uint32_t bpp,
uint32_t tiling,
uint32_t *pitch)
{
uint32_t tile_width, tile_height;
uint32_t size;
 
assert(width <= MAXSHORT);
assert(height <= MAXSHORT);
assert(bpp >= 8);
 
if (kgem->gen <= 030) {
if (tiling) {
if (kgem->gen < 030) {
tile_width = 128;
tile_height = 32;
} else {
tile_width = 512;
tile_height = 16;
}
} else {
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
}
} else switch (tiling) {
default:
case I915_TILING_NONE:
tile_width = 2 * bpp >> 3;
tile_width = ALIGN(tile_width,
kgem_pitch_alignment(kgem, flags));
tile_height = 2;
break;
 
/* XXX align to an even tile row */
case I915_TILING_X:
tile_width = 512;
tile_height = 16;
break;
case I915_TILING_Y:
tile_width = 128;
tile_height = 64;
break;
}
 
*pitch = ALIGN(width * bpp / 8, tile_width);
height = ALIGN(height, tile_height);
if (kgem->gen >= 040)
return PAGE_ALIGN(*pitch * height);
 
/* If it is too wide for the blitter, don't even bother. */
if (tiling != I915_TILING_NONE) {
if (*pitch > 8192)
return 0;
 
for (size = tile_width; size < *pitch; size <<= 1)
;
*pitch = size;
} else {
if (*pitch >= 32768)
return 0;
}
 
size = *pitch * height;
if (relaxed_fencing || tiling == I915_TILING_NONE)
return PAGE_ALIGN(size);
 
/* We need to allocate a pot fence region for a tiled buffer. */
if (kgem->gen < 030)
tile_width = 512 * 1024;
else
tile_width = 1024 * 1024;
while (tile_width < size)
tile_width *= 2;
return tile_width;
}
 
static uint32_t kgem_aligned_height(struct kgem *kgem,
uint32_t height, uint32_t tiling)
{
uint32_t tile_height;
 
if (kgem->gen <= 030) {
tile_height = tiling ? kgem->gen < 030 ? 32 : 16 : 1;
} else switch (tiling) {
/* XXX align to an even tile row */
default:
case I915_TILING_NONE:
tile_height = 1;
break;
case I915_TILING_X:
tile_height = 16;
break;
case I915_TILING_Y:
tile_height = 64;
break;
}
 
return ALIGN(height, tile_height);
}
 
static struct drm_i915_gem_exec_object2 *
kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_exec_object2 *exec;
 
DBG(("%s: handle=%d, index=%d\n",
__FUNCTION__, bo->handle, kgem->nexec));
 
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
 
kgem->aperture += num_pages(bo);
 
return exec;
}
 
static void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
{
bo->exec = kgem_add_handle(kgem, bo);
bo->rq = MAKE_REQUEST(kgem->next_request, kgem->ring);
 
list_move_tail(&bo->request, &kgem->next_request->buffers);
 
/* XXX is it worth working around gcc here? */
kgem->flush |= bo->flush;
}
 
static uint32_t kgem_end_batch(struct kgem *kgem)
{
kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END;
if (kgem->nbatch & 1)
kgem->batch[kgem->nbatch++] = MI_NOOP;
 
return kgem->nbatch;
}
 
static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
{
int n;
 
assert(kgem->nreloc__self <= 256);
if (kgem->nreloc__self == 0)
return;
 
for (n = 0; n < kgem->nreloc__self; n++) {
int i = kgem->reloc__self[n];
assert(kgem->reloc[i].target_handle == ~0U);
kgem->reloc[i].target_handle = bo->target_handle;
kgem->reloc[i].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[i].offset/sizeof(kgem->batch[0])] =
kgem->reloc[i].delta + bo->presumed_offset;
}
 
if (n == 256) {
for (n = kgem->reloc__self[255]; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == ~0U) {
kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
}
}
 
}
 
}
 
static void kgem_bo_binding_free(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_bo_binding *b;
 
b = bo->binding.next;
while (b) {
struct kgem_bo_binding *next = b->next;
free (b);
b = next;
}
}
 
static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
{
int type = IS_CPU_MAP(bo->map);
 
assert(!IS_USER_MAP(bo->map));
 
DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
__FUNCTION__, type ? "CPU" : "GTT",
bo->handle, kgem->vma[type].count));
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
user_free(MAP(bo->map));
bo->map = NULL;
 
if (!list_is_empty(&bo->vma)) {
list_del(&bo->vma);
kgem->vma[type].count--;
}
}
 
static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->refcnt == 0);
assert(bo->proxy == NULL);
assert(bo->exec == NULL);
assert(!bo->snoop || bo->rq == NULL);
 
#ifdef DEBUG_MEMORY
kgem->debug_memory.bo_allocs--;
kgem->debug_memory.bo_bytes -= bytes(bo);
#endif
 
kgem_bo_binding_free(kgem, bo);
 
if (IS_USER_MAP(bo->map)) {
assert(bo->rq == NULL);
assert(!__kgem_busy(kgem, bo->handle));
assert(MAP(bo->map) != bo || bo->io || bo->flush);
if (!(bo->io || bo->flush)) {
DBG(("%s: freeing snooped base\n", __FUNCTION__));
assert(bo != MAP(bo->map));
free(MAP(bo->map));
}
bo->map = NULL;
}
if (bo->map)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
assert(bo->map == NULL);
 
_list_del(&bo->list);
_list_del(&bo->request);
gem_close(kgem->fd, bo->handle);
 
if (!bo->io) {
*(struct kgem_bo **)bo = __kgem_freed_bo;
__kgem_freed_bo = bo;
} else
free(bo);
}
 
inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: moving handle=%d to inactive\n", __FUNCTION__, bo->handle));
 
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(bo->domain != DOMAIN_GPU);
assert(!bo->proxy);
assert(!bo->io);
assert(!bo->scanout);
assert(!bo->snoop);
assert(!bo->flush);
assert(!bo->needs_flush);
assert(list_is_empty(&bo->vma));
assert_tiling(kgem, bo);
ASSERT_IDLE(kgem, bo->handle);
 
kgem->need_expire = true;
 
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
list_move(&bo->list, &kgem->large_inactive);
return;
}
 
assert(bo->flush == false);
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map) {
int type = IS_CPU_MAP(bo->map);
if (bucket(bo) >= NUM_CACHE_BUCKETS ||
(!type && !__kgem_bo_is_mappable(kgem, bo))) {
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
}
if (bo->map) {
list_add(&bo->vma, &kgem->vma[type].inactive[bucket(bo)]);
kgem->vma[type].count++;
}
}
}
 
static struct kgem_bo *kgem_bo_replace_io(struct kgem_bo *bo)
{
struct kgem_bo *base;
 
if (!bo->io)
return bo;
 
assert(!bo->snoop);
base = malloc(sizeof(*base));
if (base) {
DBG(("%s: transferring io handle=%d to bo\n",
__FUNCTION__, bo->handle));
/* transfer the handle to a minimum bo */
memcpy(base, bo, sizeof(*base));
base->io = false;
list_init(&base->list);
list_replace(&bo->request, &base->request);
list_replace(&bo->vma, &base->vma);
free(bo);
bo = base;
} else
bo->reusable = false;
 
return bo;
}
 
inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from inactive\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (bo->map) {
assert(!list_is_empty(&bo->vma));
list_del(&bo->vma);
kgem->vma[IS_CPU_MAP(bo->map)].count--;
}
}
 
inline static void kgem_bo_remove_from_active(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: removing handle=%d from active\n", __FUNCTION__, bo->handle));
 
list_del(&bo->list);
assert(bo->rq != NULL);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
assert(list_is_empty(&bo->vma));
}
 
static void _kgem_bo_delete_buffer(struct kgem *kgem, struct kgem_bo *bo)
{
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
 
DBG(("%s: size=%d, offset=%d, parent used=%d\n",
__FUNCTION__, bo->size.bytes, bo->delta, io->used));
 
if (ALIGN(bo->delta + bo->size.bytes, UPLOAD_ALIGNMENT) == io->used)
io->used = bo->delta;
}
 
static void kgem_bo_move_to_scanout(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt == 0);
assert(bo->scanout);
assert(bo->delta);
assert(!bo->flush);
assert(!bo->snoop);
assert(!bo->io);
 
if (bo->purged) {
DBG(("%s: discarding purged scanout - external name?\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
return;
}
 
DBG(("%s: moving %d [fb %d] to scanout cache, active? %d\n",
__FUNCTION__, bo->handle, bo->delta, bo->rq != NULL));
if (bo->rq)
list_move_tail(&bo->list, &kgem->scanout);
else
list_move(&bo->list, &kgem->scanout);
}
 
static void kgem_bo_move_to_snoop(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->reusable);
assert(!bo->flush);
assert(!bo->needs_flush);
assert(bo->refcnt == 0);
assert(bo->exec == NULL);
 
if (num_pages(bo) > kgem->max_cpu_size >> 13) {
DBG(("%s handle=%d discarding large CPU buffer (%d >%d pages)\n",
__FUNCTION__, bo->handle, num_pages(bo), kgem->max_cpu_size >> 13));
kgem_bo_free(kgem, bo);
return;
}
 
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
 
DBG(("%s: moving %d to snoop cachee\n", __FUNCTION__, bo->handle));
list_add(&bo->list, &kgem->snoop);
}
 
static struct kgem_bo *
search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
 
DBG(("%s: num_pages=%d, flags=%x\n", __FUNCTION__, num_pages, flags));
 
if ((kgem->has_caching | kgem->has_userptr) == 0)
return NULL;
 
if (list_is_empty(&kgem->snoop)) {
DBG(("%s: inactive and cache empty\n", __FUNCTION__));
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
}
 
list_for_each_entry(bo, &kgem->snoop, list) {
assert(bo->refcnt == 0);
assert(bo->snoop);
assert(!bo->scanout);
assert(!bo->purged);
assert(bo->proxy == NULL);
assert(bo->tiling == I915_TILING_NONE);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
 
if (num_pages > num_pages(bo))
continue;
 
if (num_pages(bo) > 2*num_pages) {
if (first == NULL)
first = bo;
continue;
}
 
list_del(&bo->list);
bo->pitch = 0;
bo->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
return bo;
}
 
if (first) {
list_del(&first->list);
first->pitch = 0;
first->delta = 0;
 
DBG((" %s: found handle=%d (num_pages=%d) in snoop cache\n",
__FUNCTION__, first->handle, num_pages(first)));
return first;
}
 
return NULL;
}
 
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo)
{
if (kgem->nexec != 1 || bo->exec == NULL)
return;
 
DBG(("%s: only handle in batch, discarding last operations for handle=%d\n",
__FUNCTION__, bo->handle));
 
assert(bo->exec == &kgem->exec[0]);
assert(kgem->exec[0].handle == bo->handle);
assert(RQ(bo->rq) == kgem->next_request);
 
bo->refcnt++;
kgem_reset(kgem);
bo->refcnt--;
}
 
static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
 
assert(list_is_empty(&bo->list));
assert(bo->refcnt == 0);
assert(!bo->purged || !bo->reusable);
assert(bo->proxy == NULL);
assert_tiling(kgem, bo);
 
bo->binding.offset = 0;
 
if (DBG_NO_CACHE)
goto destroy;
 
if (bo->snoop && !bo->flush) {
DBG(("%s: handle=%d is snooped\n", __FUNCTION__, bo->handle));
assert(bo->reusable);
assert(list_is_empty(&bo->list));
if (bo->exec == NULL && bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
if (bo->rq == NULL)
kgem_bo_move_to_snoop(kgem, bo);
return;
}
if (!IS_USER_MAP(bo->map))
bo->flush = false;
 
if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
return;
}
 
if (bo->io)
bo = kgem_bo_replace_io(bo);
if (!bo->reusable) {
DBG(("%s: handle=%d, not reusable\n",
__FUNCTION__, bo->handle));
goto destroy;
}
 
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
 
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->flush == false);
assert(bo->snoop == false);
assert(bo->io == false);
assert(bo->scanout == false);
 
kgem_bo_undo(kgem, bo);
assert(bo->refcnt == 0);
 
if (bo->rq && bo->exec == NULL && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
if (bo->rq) {
struct list *cache;
 
DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
if (bucket(bo) < NUM_CACHE_BUCKETS)
cache = &kgem->active[bucket(bo)][bo->tiling];
else
cache = &kgem->large;
list_add(&bo->list, cache);
return;
}
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
 
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
 
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
goto destroy;
 
DBG(("%s: handle=%d, purged\n",
__FUNCTION__, bo->handle));
}
 
kgem_bo_move_to_inactive(kgem, bo);
return;
 
destroy:
if (!bo->exec)
kgem_bo_free(kgem, bo);
}
 
static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
__kgem_bo_destroy(kgem, bo);
}
 
static void kgem_buffer_release(struct kgem *kgem, struct kgem_buffer *bo)
{
while (!list_is_empty(&bo->base.vma)) {
struct kgem_bo *cached;
 
cached = list_first_entry(&bo->base.vma, struct kgem_bo, vma);
assert(cached->proxy == &bo->base);
list_del(&cached->vma);
 
assert(*(struct kgem_bo **)cached->map == cached);
*(struct kgem_bo **)cached->map = NULL;
cached->map = NULL;
 
kgem_bo_destroy(kgem, cached);
}
}
 
static bool kgem_retire__buffers(struct kgem *kgem)
{
bool retired = false;
 
while (!list_is_empty(&kgem->active_buffers)) {
struct kgem_buffer *bo =
list_last_entry(&kgem->active_buffers,
struct kgem_buffer,
base.list);
 
if (bo->base.rq)
break;
 
DBG(("%s: releasing upload cache for handle=%d? %d\n",
__FUNCTION__, bo->base.handle, !list_is_empty(&bo->base.vma)));
list_del(&bo->base.list);
kgem_buffer_release(kgem, bo);
kgem_bo_unref(kgem, &bo->base);
retired = true;
}
 
return retired;
}
 
static bool kgem_retire__flushing(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
bool retired = false;
 
list_for_each_entry_safe(bo, next, &kgem->flushing, request) {
assert(bo->rq == (void *)kgem);
assert(bo->exec == NULL);
 
if (__kgem_busy(kgem, bo->handle))
break;
 
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else
kgem_bo_free(kgem, bo);
}
#if HAS_DEBUG_FULL
{
int count = 0;
list_for_each_entry(bo, &kgem->flushing, request)
count++;
ErrorF("%s: %d bo on flushing list\n", __FUNCTION__, count);
}
#endif
 
kgem->need_retire |= !list_is_empty(&kgem->flushing);
 
return retired;
}
 
 
static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
bool retired = false;
 
DBG(("%s: request %d complete\n",
__FUNCTION__, rq->bo->handle));
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
assert(RQ(bo->rq) == rq);
assert(bo->exec == NULL);
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
 
list_del(&bo->request);
 
if (bo->needs_flush)
bo->needs_flush = __kgem_busy(kgem, bo->handle);
if (bo->needs_flush) {
DBG(("%s: moving %d to flushing\n",
__FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
continue;
}
 
bo->domain = DOMAIN_NONE;
bo->rq = NULL;
if (bo->refcnt)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
assert(rq->bo->rq == NULL);
assert(list_is_empty(&rq->bo->request));
 
if (--rq->bo->refcnt == 0) {
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, rq->bo->handle));
kgem_bo_free(kgem, rq->bo);
}
}
 
__kgem_request_free(rq);
return retired;
}
 
static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
{
bool retired = false;
 
while (!list_is_empty(&kgem->requests[ring])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list);
if (__kgem_busy(kgem, rq->bo->handle))
break;
 
retired |= __kgem_retire_rq(kgem, rq);
}
 
#if HAS_DEBUG_FULL
{
struct kgem_bo *bo;
int count = 0;
 
list_for_each_entry(bo, &kgem->requests[ring], request)
count++;
 
bo = NULL;
if (!list_is_empty(&kgem->requests[ring]))
bo = list_first_entry(&kgem->requests[ring],
struct kgem_request,
list)->bo;
 
ErrorF("%s: ring=%d, %d outstanding requests, oldest=%d\n",
__FUNCTION__, ring, count, bo ? bo->handle : 0);
}
#endif
 
return retired;
}
 
static bool kgem_retire__requests(struct kgem *kgem)
{
bool retired = false;
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
retired |= kgem_retire__requests_ring(kgem, n);
kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
}
 
return retired;
}
 
bool kgem_retire(struct kgem *kgem)
{
bool retired = false;
 
DBG(("%s\n", __FUNCTION__));
 
kgem->need_retire = false;
 
retired |= kgem_retire__flushing(kgem);
retired |= kgem_retire__requests(kgem);
retired |= kgem_retire__buffers(kgem);
 
DBG(("%s -- retired=%d, need_retire=%d\n",
__FUNCTION__, retired, kgem->need_retire));
 
kgem->retire(kgem);
 
return retired;
}
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
{
struct kgem_request *rq;
 
assert(!list_is_empty(&kgem->requests[ring]));
 
rq = list_last_entry(&kgem->requests[ring],
struct kgem_request, list);
if (__kgem_busy(kgem, rq->bo->handle)) {
DBG(("%s: last requests handle=%d still busy\n",
__FUNCTION__, rq->bo->handle));
return false;
}
 
DBG(("%s: ring=%d idle (handle=%d)\n",
__FUNCTION__, ring, rq->bo->handle));
 
kgem_retire__requests_ring(kgem, ring);
assert(list_is_empty(&kgem->requests[ring]));
return true;
}
 
static void kgem_commit(struct kgem *kgem)
{
struct kgem_request *rq = kgem->next_request;
struct kgem_bo *bo, *next;
 
list_for_each_entry_safe(bo, next, &rq->buffers, request) {
assert(next->request.prev == &bo->request);
 
DBG(("%s: release handle=%d (proxy? %d), dirty? %d flush? %d, snoop? %d -> offset=%x\n",
__FUNCTION__, bo->handle, bo->proxy != NULL,
bo->gpu_dirty, bo->needs_flush, bo->snoop,
(unsigned)bo->exec->offset));
 
assert(bo->exec);
assert(bo->proxy == NULL || bo->exec == &_kgem_dummy_exec);
assert(RQ(bo->rq) == rq || (RQ(bo->proxy->rq) == rq));
 
bo->presumed_offset = bo->exec->offset;
bo->exec = NULL;
bo->target_handle = -1;
 
if (!bo->refcnt && !bo->reusable) {
assert(!bo->snoop);
kgem_bo_free(kgem, bo);
continue;
}
 
bo->binding.offset = 0;
bo->domain = DOMAIN_GPU;
bo->gpu_dirty = false;
 
if (bo->proxy) {
/* proxies are not used for domain tracking */
bo->exec = NULL;
__kgem_bo_clear_busy(bo);
}
 
kgem->scanout_busy |= bo->scanout;
}
 
if (rq == &kgem->static_request) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
}
 
kgem_retire(kgem);
assert(list_is_empty(&rq->buffers));
 
assert(rq->bo->map == NULL);
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
} else {
list_add_tail(&rq->list, &kgem->requests[rq->ring]);
kgem->need_throttle = kgem->need_retire = 1;
}
 
kgem->next_request = NULL;
}
 
static void kgem_close_list(struct kgem *kgem, struct list *head)
{
while (!list_is_empty(head))
kgem_bo_free(kgem, list_first_entry(head, struct kgem_bo, list));
}
 
static void kgem_close_inactive(struct kgem *kgem)
{
unsigned int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
kgem_close_list(kgem, &kgem->inactive[i]);
}
 
static void kgem_finish_buffers(struct kgem *kgem)
{
struct kgem_buffer *bo, *next;
 
list_for_each_entry_safe(bo, next, &kgem->batch_buffers, base.list) {
DBG(("%s: buffer handle=%d, used=%d, exec?=%d, write=%d, mmapped=%s\n",
__FUNCTION__, bo->base.handle, bo->used, bo->base.exec!=NULL,
bo->write, bo->mmapped ? IS_CPU_MAP(bo->base.map) ? "cpu" : "gtt" : "no"));
 
assert(next->base.list.prev == &bo->base.list);
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
if (!bo->base.exec) {
DBG(("%s: skipping unattached handle=%d, used=%d\n",
__FUNCTION__, bo->base.handle, bo->used));
continue;
}
 
if (!bo->write) {
assert(bo->base.exec || bo->base.refcnt > 1);
goto decouple;
}
 
if (bo->mmapped) {
int used;
 
assert(!bo->need_io);
 
used = ALIGN(bo->used, PAGE_SIZE);
if (!DBG_NO_UPLOAD_ACTIVE &&
used + PAGE_SIZE <= bytes(&bo->base) &&
(kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) {
DBG(("%s: retaining upload buffer (%d/%d)\n",
__FUNCTION__, bo->used, bytes(&bo->base)));
bo->used = used;
list_move(&bo->base.list,
&kgem->active_buffers);
continue;
}
DBG(("%s: discarding mmapped buffer, used=%d, map type=%d\n",
__FUNCTION__, bo->used, (int)__MAP_TYPE(bo->base.map)));
goto decouple;
}
 
if (!bo->used) {
/* Unless we replace the handle in the execbuffer,
* then this bo will become active. So decouple it
* from the buffer list and track it in the normal
* manner.
*/
goto decouple;
}
 
assert(bo->need_io);
assert(bo->base.rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(bo->base.domain != DOMAIN_GPU);
 
if (bo->base.refcnt == 1 &&
bo->base.size.pages.count > 1 &&
bo->used < bytes(&bo->base) / 2) {
struct kgem_bo *shrink;
unsigned alloc = NUM_PAGES(bo->used);
 
shrink = search_snoop_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
void *map;
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
map = kgem_bo_map__cpu(kgem, shrink);
if (map) {
kgem_bo_sync__cpu(kgem, shrink);
memcpy(map, bo->mem, bo->used);
 
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.gpu_dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.gpu_dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
 
shrink = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_NO_RETIRE);
if (shrink) {
int n;
 
DBG(("%s: used=%d, shrinking %d to %d, handle %d to %d\n",
__FUNCTION__,
bo->used, bytes(&bo->base), bytes(shrink),
bo->base.handle, shrink->handle));
 
assert(bo->used <= bytes(shrink));
if (gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem) == 0) {
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
}
}
 
bo->base.exec->handle = shrink->handle;
bo->base.exec->offset = shrink->presumed_offset;
shrink->exec = bo->base.exec;
shrink->rq = bo->base.rq;
list_replace(&bo->base.request,
&shrink->request);
list_init(&bo->base.request);
shrink->needs_flush = bo->base.gpu_dirty;
 
bo->base.exec = NULL;
bo->base.rq = NULL;
bo->base.gpu_dirty = false;
bo->base.needs_flush = false;
bo->used = 0;
 
goto decouple;
}
 
__kgem_bo_destroy(kgem, shrink);
}
}
 
DBG(("%s: handle=%d, uploading %d/%d\n",
__FUNCTION__, bo->base.handle, bo->used, bytes(&bo->base)));
ASSERT_IDLE(kgem, bo->base.handle);
assert(bo->used <= bytes(&bo->base));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
bo->need_io = 0;
 
decouple:
DBG(("%s: releasing handle=%d\n",
__FUNCTION__, bo->base.handle));
list_del(&bo->base.list);
kgem_bo_unref(kgem, &bo->base);
}
}
 
static void kgem_cleanup(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
while (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo;
 
bo = list_first_entry(&rq->buffers,
struct kgem_bo,
request);
 
bo->exec = NULL;
bo->gpu_dirty = false;
__kgem_bo_clear_busy(bo);
if (bo->refcnt == 0)
kgem_bo_free(kgem, bo);
}
 
__kgem_request_free(rq);
}
}
 
kgem_close_inactive(kgem);
}
 
static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
{
int ret;
 
ASSERT_IDLE(kgem, handle);
 
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
 
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
}
 
/* Disjoint surface/batch, upload separately */
ret = gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
if (ret)
return ret;
 
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
}
 
void kgem_reset(struct kgem *kgem)
{
if (kgem->next_request) {
struct kgem_request *rq = kgem->next_request;
 
while (!list_is_empty(&rq->buffers)) {
struct kgem_bo *bo =
list_first_entry(&rq->buffers,
struct kgem_bo,
request);
list_del(&bo->request);
 
assert(RQ(bo->rq) == rq);
 
bo->binding.offset = 0;
bo->exec = NULL;
bo->target_handle = -1;
bo->gpu_dirty = false;
 
if (bo->needs_flush && __kgem_busy(kgem, bo->handle)) {
assert(bo->domain == DOMAIN_GPU || bo->domain == DOMAIN_NONE);
list_add(&bo->request, &kgem->flushing);
bo->rq = (void *)kgem;
} else
__kgem_bo_clear_busy(bo);
 
if (bo->refcnt || bo->rq)
continue;
 
if (bo->snoop) {
kgem_bo_move_to_snoop(kgem, bo);
} else if (bo->scanout) {
kgem_bo_move_to_scanout(kgem, bo);
} else if ((bo = kgem_bo_replace_io(bo))->reusable &&
kgem_bo_set_purgeable(kgem, bo)) {
kgem_bo_move_to_inactive(kgem, bo);
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
 
if (rq != &kgem->static_request) {
list_init(&rq->list);
__kgem_request_free(rq);
}
}
 
kgem->nfence = 0;
kgem->nexec = 0;
kgem->nreloc = 0;
kgem->nreloc__self = 0;
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
kgem->batch_flags = kgem->batch_flags_base;
 
kgem->next_request = __kgem_request_alloc(kgem);
 
kgem_sna_reset(kgem);
}
 
static int compact_batch_surface(struct kgem *kgem)
{
int size, shrink, n;
 
if (!kgem->has_relaxed_delta)
return kgem->batch_size;
 
/* See if we can pack the contents into one or two pages */
n = ALIGN(kgem->batch_size, 1024);
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
 
shrink = n - size;
if (shrink) {
DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
 
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
 
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
kgem->reloc[n].offset -= shrink;
}
}
 
return size * sizeof(uint32_t);
}
 
static struct kgem_bo *
kgem_create_batch(struct kgem *kgem, int size)
{
struct drm_i915_gem_set_domain set_domain;
struct kgem_bo *bo;
 
if (size <= 4096) {
bo = list_first_entry(&kgem->pinned_batches[0],
struct kgem_bo,
list);
if (!bo->rq) {
out_4096:
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_4096;
}
}
 
if (size <= 16384) {
bo = list_first_entry(&kgem->pinned_batches[1],
struct kgem_bo,
list);
if (!bo->rq) {
out_16384:
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
return kgem_bo_reference(bo);
}
 
if (!__kgem_busy(kgem, bo->handle)) {
assert(RQ(bo->rq)->bo == bo);
__kgem_retire_rq(kgem, RQ(bo->rq));
goto out_16384;
}
}
 
if (kgem->gen == 020 && !kgem->has_pinned_batches) {
assert(size <= 16384);
 
bo = list_first_entry(&kgem->pinned_batches[size > 4096],
struct kgem_bo,
list);
list_move_tail(&bo->list, &kgem->pinned_batches[size > 4096]);
 
DBG(("%s: syncing due to busy batches\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain)) {
DBG(("%s: sync: GPU hang detected\n", __FUNCTION__));
kgem_throttle(kgem);
return NULL;
}
 
kgem_retire(kgem);
assert(bo->rq == NULL);
return kgem_bo_reference(bo);
}
 
return kgem_create_linear(kgem, size, CREATE_NO_THROTTLE);
}
 
void _kgem_submit(struct kgem *kgem)
{
struct kgem_request *rq;
uint32_t batch_end;
int size;
 
assert(!DBG_NO_HW);
assert(!kgem->wedged);
 
assert(kgem->nbatch);
assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem));
assert(kgem->nbatch <= kgem->surface);
 
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
 
DBG(("batch[%d/%d, flags=%x]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, kgem->batch_flags,
batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
 
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
assert(kgem->nfence <= kgem->fence_max);
 
kgem_finish_buffers(kgem);
 
#if SHOW_BATCH
__kgem_batch_debug(kgem, batch_end);
#endif
 
rq = kgem->next_request;
if (kgem->surface != kgem->batch_size)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
rq->bo = kgem_create_batch(kgem, size);
if (rq->bo) {
uint32_t handle = rq->bo->handle;
int i;
 
assert(!rq->bo->needs_flush);
 
i = kgem->nexec++;
kgem->exec[i].handle = handle;
kgem->exec[i].relocation_count = kgem->nreloc;
kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc;
kgem->exec[i].alignment = 0;
kgem->exec[i].offset = rq->bo->presumed_offset;
kgem->exec[i].flags = 0;
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
 
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = MAKE_REQUEST(rq, kgem->ring); /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
rq->ring = kgem->ring == KGEM_BLT;
 
kgem_fixup_self_relocs(kgem, rq->bo);
 
if (kgem_batch_write(kgem, handle, size) == 0) {
struct drm_i915_gem_execbuffer2 execbuf;
int ret, retry = 3;
 
memset(&execbuf, 0, sizeof(execbuf));
execbuf.buffers_ptr = (uintptr_t)kgem->exec;
execbuf.buffer_count = kgem->nexec;
execbuf.batch_len = batch_end*sizeof(uint32_t);
execbuf.flags = kgem->ring | kgem->batch_flags;
 
if (DEBUG_DUMP)
{
int fd = open("/tmp1/1/batchbuffer.bin", O_CREAT|O_WRONLY|O_BINARY);
if (fd != -1) {
write(fd, kgem->batch, size);
close(fd);
}
else printf("SNA: failed to write batchbuffer\n");
asm volatile("int3");
}
 
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
while (ret == -1 && errno == EBUSY && retry--) {
__kgem_throttle(kgem);
ret = drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_EXECBUFFER2,
&execbuf);
}
if (DEBUG_SYNC && ret == 0) {
struct drm_i915_gem_set_domain set_domain;
 
VG_CLEAR(set_domain);
set_domain.handle = handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 
ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
}
if (ret == -1) {
DBG(("%s: GPU hang detected [%d]\n",
__FUNCTION__, errno));
kgem_throttle(kgem);
kgem->wedged = true;
 
#if 0
ret = errno;
ErrorF("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d: errno=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture, errno);
 
for (i = 0; i < kgem->nexec; i++) {
struct kgem_bo *bo, *found = NULL;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request) {
if (bo->handle == kgem->exec[i].handle) {
found = bo;
break;
}
}
ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, snooped %d, deleted %d\n",
i,
kgem->exec[i].handle,
(int)kgem->exec[i].offset,
found ? kgem_bo_size(found) : -1,
found ? found->tiling : -1,
(int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
found ? found->snoop : -1,
found ? found->purged : -1);
}
for (i = 0; i < kgem->nreloc; i++) {
ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
i,
(int)kgem->reloc[i].offset,
kgem->reloc[i].target_handle,
kgem->reloc[i].delta,
kgem->reloc[i].read_domains,
kgem->reloc[i].write_domain,
(int)kgem->reloc[i].presumed_offset);
}
 
if (DEBUG_SYNC) {
int fd = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
if (fd != -1) {
write(fd, kgem->batch, batch_end*sizeof(uint32_t));
close(fd);
}
 
FatalError("SNA: failed to submit batchbuffer, errno=%d\n", ret);
}
#endif
}
}
 
kgem_commit(kgem);
}
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem_reset(kgem);
 
assert(kgem->next_request != NULL);
}
 
void kgem_throttle(struct kgem *kgem)
{
kgem->need_throttle = 0;
if (kgem->wedged)
return;
 
kgem->wedged = __kgem_throttle(kgem);
if (kgem->wedged) {
printf("Detected a hung GPU, disabling acceleration.\n");
printf("When reporting this, please include i915_error_state from debugfs and the full dmesg.\n");
}
}
 
void kgem_purge_cache(struct kgem *kgem)
{
struct kgem_bo *bo, *next;
int i;
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
if (!kgem_bo_is_retained(kgem, bo)) {
DBG(("%s: purging %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
}
 
kgem->need_purge = false;
}
 
 
void kgem_clean_large_cache(struct kgem *kgem)
{
while (!list_is_empty(&kgem->large_inactive)) {
kgem_bo_free(kgem,
list_first_entry(&kgem->large_inactive,
struct kgem_bo, list));
 
}
}
 
bool kgem_expire_cache(struct kgem *kgem)
{
time_t now, expire;
struct kgem_bo *bo;
unsigned int size = 0, count = 0;
bool idle;
unsigned int i;
 
time(&now);
 
while (__kgem_freed_bo) {
bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
while (__kgem_freed_request) {
struct kgem_request *rq = __kgem_freed_request;
__kgem_freed_request = *(struct kgem_request **)rq;
free(rq);
}
 
kgem_clean_large_cache(kgem);
 
expire = 0;
list_for_each_entry(bo, &kgem->snoop, list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME/2;
break;
}
 
bo->delta = now;
}
if (expire) {
while (!list_is_empty(&kgem->snoop)) {
bo = list_last_entry(&kgem->snoop, struct kgem_bo, list);
 
if (bo->delta > expire)
break;
 
kgem_bo_free(kgem, bo);
}
}
#ifdef DEBUG_MEMORY
{
long snoop_size = 0;
int snoop_count = 0;
list_for_each_entry(bo, &kgem->snoop, list)
snoop_count++, snoop_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in snoop cache\n",
__FUNCTION__, snoop_count, snoop_size);
}
#endif
 
kgem_retire(kgem);
if (kgem->wedged)
kgem_cleanup(kgem);
 
kgem->expire(kgem);
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
expire = 0;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
idle &= list_is_empty(&kgem->inactive[i]);
list_for_each_entry(bo, &kgem->inactive[i], list) {
if (bo->delta) {
expire = now - MAX_INACTIVE_TIME;
break;
}
 
bo->delta = now;
}
}
if (idle) {
DBG(("%s: idle\n", __FUNCTION__));
kgem->need_expire = false;
return false;
}
if (expire == 0)
return true;
 
idle = !kgem->need_retire;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
struct list preserve;
 
list_init(&preserve);
while (!list_is_empty(&kgem->inactive[i])) {
bo = list_last_entry(&kgem->inactive[i],
struct kgem_bo, list);
 
if (bo->delta > expire) {
idle = false;
break;
}
 
if (bo->map && bo->delta + MAP_PRESERVE_TIME > expire) {
idle = false;
list_move_tail(&bo->list, &preserve);
} else {
count++;
size += bytes(bo);
kgem_bo_free(kgem, bo);
DBG(("%s: expiring %d\n",
__FUNCTION__, bo->handle));
}
}
if (!list_is_empty(&preserve)) {
preserve.prev->next = kgem->inactive[i].next;
kgem->inactive[i].next->prev = preserve.prev;
kgem->inactive[i].next = preserve.next;
preserve.next->prev = &kgem->inactive[i];
}
}
 
#ifdef DEBUG_MEMORY
{
long inactive_size = 0;
int inactive_count = 0;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
list_for_each_entry(bo, &kgem->inactive[i], list)
inactive_count++, inactive_size += bytes(bo);
ErrorF("%s: still allocated %d bo, %ld bytes, in inactive cache\n",
__FUNCTION__, inactive_count, inactive_size);
}
#endif
 
DBG(("%s: expired %d objects, %d bytes, idle? %d\n",
__FUNCTION__, count, size, idle));
 
kgem->need_expire = !idle;
return !idle;
(void)count;
(void)size;
}
 
void kgem_cleanup_cache(struct kgem *kgem)
{
unsigned int i;
int n;
 
/* sync to the most recent request */
for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
if (!list_is_empty(&kgem->requests[n])) {
struct kgem_request *rq;
struct drm_i915_gem_set_domain set_domain;
 
rq = list_first_entry(&kgem->requests[n],
struct kgem_request,
list);
 
DBG(("%s: sync on cleanup\n", __FUNCTION__));
 
VG_CLEAR(set_domain);
set_domain.handle = rq->bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
(void)drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN,
&set_domain);
}
}
 
kgem_retire(kgem);
kgem_cleanup(kgem);
 
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
while (!list_is_empty(&kgem->inactive[i]))
kgem_bo_free(kgem,
list_last_entry(&kgem->inactive[i],
struct kgem_bo, list));
}
 
kgem_clean_large_cache(kgem);
 
while (!list_is_empty(&kgem->snoop))
kgem_bo_free(kgem,
list_last_entry(&kgem->snoop,
struct kgem_bo, list));
 
while (__kgem_freed_bo) {
struct kgem_bo *bo = __kgem_freed_bo;
__kgem_freed_bo = *(struct kgem_bo **)bo;
free(bo);
}
 
kgem->need_purge = false;
kgem->need_expire = false;
}
 
static struct kgem_bo *
search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
{
struct kgem_bo *bo, *first = NULL;
bool use_active = (flags & CREATE_INACTIVE) == 0;
struct list *cache;
 
DBG(("%s: num_pages=%d, flags=%x, use_active? %d, use_large=%d [max=%d]\n",
__FUNCTION__, num_pages, flags, use_active,
num_pages >= MAX_CACHE_SIZE / PAGE_SIZE,
MAX_CACHE_SIZE / PAGE_SIZE));
 
assert(num_pages);
 
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE) {
DBG(("%s: searching large buffers\n", __FUNCTION__));
retry_large:
cache = use_active ? &kgem->large : &kgem->large_inactive;
list_for_each_entry_safe(bo, first, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
goto discard;
 
if (bo->tiling != I915_TILING_NONE) {
if (use_active)
goto discard;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
goto discard;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo))
goto discard;
 
list_del(&bo->list);
if (bo->rq == (void *)kgem)
list_del(&bo->request);
 
bo->delta = 0;
assert_tiling(kgem, bo);
return bo;
 
discard:
if (!use_active)
kgem_bo_free(kgem, bo);
}
 
if (use_active) {
use_active = false;
goto retry_large;
}
 
if (__kgem_throttle_retire(kgem, flags))
goto retry_large;
 
return NULL;
}
 
if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: inactive and cache bucket empty\n",
__FUNCTION__));
 
if (flags & CREATE_NO_RETIRE) {
DBG(("%s: can not retire\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE))) {
DBG(("%s: active cache bucket empty\n", __FUNCTION__));
return NULL;
}
 
if (!__kgem_throttle_retire(kgem, flags)) {
DBG(("%s: nothing retired\n", __FUNCTION__));
return NULL;
}
 
if (list_is_empty(inactive(kgem, num_pages))) {
DBG(("%s: active cache bucket still empty after retire\n",
__FUNCTION__));
return NULL;
}
}
 
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
DBG(("%s: searching for inactive %s map\n",
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), num_pages));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling &&
!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear vma cache\n",
__FUNCTION__, bo->handle, num_pages(bo)));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush);
assert_tiling(kgem, bo);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (flags & CREATE_EXACT)
return NULL;
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc)
return NULL;
}
 
cache = use_active ? active(kgem, num_pages, I915_TILING_NONE) : inactive(kgem, num_pages);
list_for_each_entry(bo, cache, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!!bo->rq == !!use_active);
assert(bo->proxy == NULL);
assert(!bo->scanout);
 
if (num_pages > num_pages(bo))
continue;
 
if (use_active &&
kgem->gen <= 040 &&
bo->tiling != I915_TILING_NONE)
continue;
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
if (I915_TILING_NONE != bo->tiling) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP))
continue;
 
if (first)
continue;
 
if (!gem_set_tiling(kgem->fd, bo->handle,
I915_TILING_NONE, 0))
continue;
 
bo->tiling = I915_TILING_NONE;
bo->pitch = 0;
}
 
if (bo->map) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (IS_CPU_MAP(bo->map) != for_cpu) {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (first != NULL)
break;
 
first = bo;
continue;
}
} else {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
if (first != NULL)
break;
 
first = bo;
continue;
}
}
 
if (use_active)
kgem_bo_remove_from_active(kgem, bo);
else
kgem_bo_remove_from_inactive(kgem, bo);
 
assert(bo->tiling == I915_TILING_NONE);
bo->pitch = 0;
bo->delta = 0;
DBG((" %s: found handle=%d (num_pages=%d) in linear %s cache\n",
__FUNCTION__, bo->handle, num_pages(bo),
use_active ? "active" : "inactive"));
assert(list_is_empty(&bo->list));
assert(use_active || bo->domain != DOMAIN_GPU);
assert(!bo->needs_flush || use_active);
assert_tiling(kgem, bo);
ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active);
return bo;
}
 
if (first) {
assert(first->tiling == I915_TILING_NONE);
 
if (use_active)
kgem_bo_remove_from_active(kgem, first);
else
kgem_bo_remove_from_inactive(kgem, first);
 
first->pitch = 0;
first->delta = 0;
DBG((" %s: found handle=%d (near-miss) (num_pages=%d) in linear %s cache\n",
__FUNCTION__, first->handle, num_pages(first),
use_active ? "active" : "inactive"));
assert(list_is_empty(&first->list));
assert(use_active || first->domain != DOMAIN_GPU);
assert(!first->needs_flush || use_active);
ASSERT_MAYBE_IDLE(kgem, first->handle, !use_active);
return first;
}
 
return NULL;
}
 
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags)
{
struct kgem_bo *bo;
uint32_t handle;
 
DBG(("%s(%d)\n", __FUNCTION__, size));
assert(size);
 
if (flags & CREATE_GTT_MAP && kgem->has_llc) {
flags &= ~CREATE_GTT_MAP;
flags |= CREATE_CPU_MAP;
}
 
size = NUM_PAGES(size);
bo = search_linear_cache(kgem, size, CREATE_INACTIVE | flags);
if (bo) {
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_CACHED)
return NULL;
 
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
DBG(("%s: new handle=%d, num_pages=%d\n", __FUNCTION__, handle, size));
bo = __kgem_bo_alloc(handle, size);
if (bo == NULL) {
gem_close(kgem->fd, handle);
return NULL;
}
 
debug_alloc__bo(kgem, bo);
return bo;
}
 
inline int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo)
{
unsigned int size;
 
assert(bo->tiling);
assert_tiling(kgem, bo);
assert(kgem->gen < 040);
 
if (kgem->gen < 030)
size = 512 * 1024;
else
size = 1024 * 1024;
while (size < bytes(bo))
size *= 2;
 
return size;
}
 
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags)
{
struct list *cache;
struct kgem_bo *bo;
uint32_t pitch, tiled_height, size;
uint32_t handle;
int i, bucket, retry;
bool exact = flags & (CREATE_EXACT | CREATE_SCANOUT);
 
if (tiling < 0)
exact = true, tiling = -tiling;
 
 
DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d, cpu-mapping=%d, gtt-mapping=%d, scanout?=%d, prime?=%d, temp?=%d)\n", __FUNCTION__,
width, height, bpp, tiling, exact,
!!(flags & CREATE_INACTIVE),
!!(flags & CREATE_CPU_MAP),
!!(flags & CREATE_GTT_MAP),
!!(flags & CREATE_SCANOUT),
!!(flags & CREATE_PRIME),
!!(flags & CREATE_TEMPORARY)));
 
size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
assert(size && size <= kgem->max_object_size);
size /= PAGE_SIZE;
bucket = cache_bucket(size);
 
if (flags & CREATE_SCANOUT) {
struct kgem_bo *last = NULL;
 
list_for_each_entry_reverse(bo, &kgem->scanout, list) {
assert(bo->scanout);
assert(bo->delta);
assert(!bo->flush);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo) || num_pages(bo) > 2*size)
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (flags & CREATE_INACTIVE && bo->rq) {
last = bo;
continue;
}
 
list_del(&bo->list);
 
bo->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
if (last) {
list_del(&last->list);
 
last->unique_id = kgem_get_unique_id(kgem);
DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n",
last->pitch, last->tiling, last->handle, last->unique_id));
assert(last->pitch*kgem_aligned_height(kgem, height, last->tiling) <= kgem_bo_size(last));
assert_tiling(kgem, last);
last->refcnt = 1;
return last;
}
 
bo = NULL; //__kgem_bo_create_as_display(kgem, size, tiling, pitch);
if (bo)
return bo;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: large bo num pages=%d, bucket=%d\n",
__FUNCTION__, size, bucket));
 
if (flags & CREATE_INACTIVE)
goto large_inactive;
 
tiled_height = kgem_aligned_height(kgem, height, tiling);
 
list_for_each_entry(bo, &kgem->large, list) {
assert(!bo->purged);
assert(!bo->scanout);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert_tiling(kgem, bo);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch || bo->tiling != tiling) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
bo->tiling = tiling;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
bo->flush = true;
return bo;
}
 
large_inactive:
__kgem_throttle_retire(kgem, flags);
list_for_each_entry(bo, &kgem->large_inactive, list) {
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo))
continue;
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
bo->tiling = tiling;
bo->pitch = pitch;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
list_del(&bo->list);
 
assert(bo->domain != DOMAIN_GPU);
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->delta = 0;
DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
goto create;
}
 
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (kgem->has_llc && tiling == I915_TILING_NONE)
for_cpu = 1;
/* We presume that we will need to upload to this bo,
* and so would prefer to have an active VMA.
*/
cache = &kgem->vma[for_cpu].inactive[bucket];
do {
list_for_each_entry(bo, cache, vma) {
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(bo->map);
assert(IS_CPU_MAP(bo->map) == for_cpu);
assert(bo->rq == NULL);
assert(list_is_empty(&bo->request));
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
DBG(("inactive vma with wrong tiling: %d < %d\n",
bo->tiling, tiling));
continue;
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
assert(bo->tiling == tiling);
bo->pitch = pitch;
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
bo->domain = DOMAIN_NONE;
 
kgem_bo_remove_from_inactive(kgem, bo);
 
DBG((" from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->reusable);
assert(bo->domain != DOMAIN_GPU);
ASSERT_IDLE(kgem, bo->handle);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
} while (!list_is_empty(cache) &&
__kgem_throttle_retire(kgem, flags));
 
if (flags & CREATE_CPU_MAP && !kgem->has_llc) {
if (list_is_empty(&kgem->active[bucket][tiling]) &&
list_is_empty(&kgem->inactive[bucket]))
flags &= ~CREATE_CACHED;
 
goto create;
}
}
 
if (flags & CREATE_INACTIVE)
goto skip_active_search;
 
/* Best active match */
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3 && (flags & CREATE_TEMPORARY) == 0)
retry = 3;
search_again:
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->active[bucket][tiling];
if (tiling) {
tiled_height = kgem_aligned_height(kgem, height, tiling);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert(!bo->scanout);
assert_tiling(kgem, bo);
 
if (kgem->gen < 040) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
} else {
if (num_pages(bo) < size)
continue;
 
if (bo->pitch != pitch) {
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
bo->pitch = pitch;
}
}
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
} else {
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->tiling == tiling);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (num_pages(bo) < size)
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->pitch = pitch;
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
 
if (--retry && exact) {
if (kgem->gen >= 040) {
for (i = I915_TILING_NONE; i <= I915_TILING_Y; i++) {
if (i == tiling)
continue;
 
cache = &kgem->active[bucket][i];
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (num_pages(bo) < size)
continue;
 
if (!gem_set_tiling(kgem->fd,
bo->handle,
tiling, pitch))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = tiling;
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
}
 
bucket++;
goto search_again;
}
 
if (!exact) { /* allow an active near-miss? */
i = tiling;
while (--i >= 0) {
tiled_height = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags,
width, height, bpp, tiling, &pitch);
cache = active(kgem, tiled_height / PAGE_SIZE, i);
tiled_height = kgem_aligned_height(kgem, height, i);
list_for_each_entry(bo, cache, list) {
assert(!bo->purged);
assert(bo->refcnt == 0);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (bo->tiling) {
if (bo->pitch < pitch) {
DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n",
bo->tiling, tiling,
bo->pitch, pitch));
continue;
}
} else
bo->pitch = pitch;
 
if (bo->pitch * tiled_height > bytes(bo))
continue;
 
kgem_bo_remove_from_active(kgem, bo);
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->delta = 0;
DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
}
}
 
skip_active_search:
bucket = cache_bucket(size);
retry = NUM_CACHE_BUCKETS - bucket;
if (retry > 3)
retry = 3;
search_inactive:
/* Now just look for a close match and prefer any currently active */
assert(bucket < NUM_CACHE_BUCKETS);
cache = &kgem->inactive[bucket];
list_for_each_entry(bo, cache, list) {
assert(bucket(bo) == bucket);
assert(bo->reusable);
assert(!bo->scanout);
assert(bo->flush == false);
assert_tiling(kgem, bo);
 
if (size > num_pages(bo)) {
DBG(("inactive too small: %d < %d\n",
num_pages(bo), size));
continue;
}
 
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (!gem_set_tiling(kgem->fd, bo->handle,
tiling, pitch))
continue;
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
}
 
if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
kgem_bo_free(kgem, bo);
break;
}
 
kgem_bo_remove_from_inactive(kgem, bo);
 
bo->pitch = pitch;
bo->tiling = tiling;
 
bo->delta = 0;
bo->unique_id = kgem_get_unique_id(kgem);
assert(bo->pitch);
DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id));
assert(bo->refcnt == 0);
assert(bo->reusable);
assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE);
assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo));
assert_tiling(kgem, bo);
bo->refcnt = 1;
return bo;
}
 
if (flags & CREATE_INACTIVE &&
!list_is_empty(&kgem->active[bucket][tiling]) &&
__kgem_throttle_retire(kgem, flags)) {
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
if (--retry) {
bucket++;
flags &= ~CREATE_INACTIVE;
goto search_inactive;
}
 
create:
if (flags & CREATE_CACHED)
return NULL;
 
if (bucket >= NUM_CACHE_BUCKETS)
size = ALIGN(size, 1024);
handle = gem_create(kgem->fd, size);
if (handle == 0)
return NULL;
 
bo = __kgem_bo_alloc(handle, size);
if (!bo) {
gem_close(kgem->fd, handle);
return NULL;
}
 
if (bucket >= NUM_CACHE_BUCKETS) {
DBG(("%s: marking large bo for automatic flushing\n",
__FUNCTION__));
bo->flush = true;
}
 
bo->unique_id = kgem_get_unique_id(kgem);
if (tiling == I915_TILING_NONE ||
gem_set_tiling(kgem->fd, handle, tiling, pitch)) {
bo->tiling = tiling;
bo->pitch = pitch;
} else {
if (flags & CREATE_EXACT) {
if (bo->pitch != pitch || bo->tiling != tiling) {
kgem_bo_free(kgem, bo);
return NULL;
}
}
}
 
assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
assert_tiling(kgem, bo);
 
debug_alloc__bo(kgem, bo);
 
DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d, num_pages=%d [%d], bucket=%d\n",
bo->pitch, bo->tiling, bo->handle, bo->unique_id,
size, num_pages(bo), bucket(bo)));
return bo;
}
 
#if 0
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags)
{
struct kgem_bo *bo;
int stride, size;
 
if (DBG_NO_CPU)
return NULL;
 
DBG(("%s(%dx%d, bpp=%d)\n", __FUNCTION__, width, height, bpp));
 
if (kgem->has_llc) {
bo = kgem_create_2d(kgem, width, height, bpp,
I915_TILING_NONE, flags);
if (bo == NULL)
return bo;
 
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
return bo;
}
 
assert(width > 0 && height > 0);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
size = stride * ALIGN(height, 2);
assert(size >= PAGE_SIZE);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = search_snoop_cache(kgem, NUM_PAGES(size), 0);
if (bo) {
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
assert(bo->snoop);
bo->refcnt = 1;
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_caching) {
bo = kgem_create_linear(kgem, size, flags);
if (bo == NULL)
return NULL;
 
assert(bo->tiling == I915_TILING_NONE);
assert_tiling(kgem, bo);
 
if (!gem_set_caching(kgem->fd, bo->handle, SNOOPED)) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
bo->snoop = true;
 
if (kgem_bo_map__cpu(kgem, bo) == NULL) {
kgem_bo_destroy(kgem, bo);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
if (kgem->has_userptr) {
void *ptr;
 
/* XXX */
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&ptr, PAGE_SIZE, ALIGN(size, PAGE_SIZE)))
return NULL;
 
bo = kgem_create_map(kgem, ptr, size, false);
if (bo == NULL) {
free(ptr);
return NULL;
}
 
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
return NULL;
}
#endif
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, proxy? %d\n",
__FUNCTION__, bo->handle, bo->proxy != NULL));
 
if (bo->proxy) {
_list_del(&bo->vma);
_list_del(&bo->request);
if (bo->io && bo->exec == NULL)
_kgem_bo_delete_buffer(kgem, bo);
kgem_bo_unref(kgem, bo->proxy);
kgem_bo_binding_free(kgem, bo);
free(bo);
return;
}
 
__kgem_bo_destroy(kgem, bo);
}
 
static void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->rq);
assert(bo->exec == NULL);
assert(bo->needs_flush);
 
/* The kernel will emit a flush *and* update its own flushing lists. */
if (!__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
DBG(("%s: handle=%d, busy?=%d\n",
__FUNCTION__, bo->handle, bo->rq != NULL));
}
 
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_submit(kgem, bo);
if (!bo->needs_flush)
return;
 
/* If the kernel fails to emit the flush, then it will be forced when
* we assume direct access. And as the usual failure is EIO, we do
* not actually care.
*/
assert(bo->exec == NULL);
if (bo->rq)
__kgem_flush(kgem, bo);
 
/* Whatever actually happens, we can regard the GTT write domain
* as being flushed.
*/
bo->gtt_dirty = false;
bo->needs_flush = false;
bo->domain = DOMAIN_NONE;
}
 
inline static bool needs_semaphore(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem->nreloc && bo->rq && RQ_RING(bo->rq) != kgem->ring;
}
 
bool kgem_check_bo(struct kgem *kgem, ...)
{
va_list ap;
struct kgem_bo *bo;
int num_exec = 0;
int num_pages = 0;
bool flush = false;
 
va_start(ap, kgem);
while ((bo = va_arg(ap, struct kgem_bo *))) {
while (bo->proxy)
bo = bo->proxy;
if (bo->exec)
continue;
 
if (needs_semaphore(kgem, bo))
return false;
 
num_pages += num_pages(bo);
num_exec++;
 
flush |= bo->flush;
}
va_end(ap);
 
DBG(("%s: num_pages=+%d, num_exec=+%d\n",
__FUNCTION__, num_pages, num_exec));
 
if (!num_pages)
return true;
 
if (kgem_flush(kgem, flush))
return false;
 
if (kgem->aperture > kgem->aperture_low &&
kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: current aperture usage (%d) is greater than low water mark (%d)\n",
__FUNCTION__, kgem->aperture, kgem->aperture_low));
return false;
}
 
if (num_pages + kgem->aperture > kgem->aperture_high) {
DBG(("%s: final aperture usage (%d) is greater than high water mark (%d)\n",
__FUNCTION__, num_pages + kgem->aperture, kgem->aperture_high));
return false;
}
 
if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) {
DBG(("%s: out of exec slots (%d + %d / %d)\n", __FUNCTION__,
kgem->nexec, num_exec, KGEM_EXEC_SIZE(kgem)));
return false;
}
 
return true;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domain,
uint32_t delta)
{
int index;
 
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
 
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
if( bo != NULL && bo->handle == -2)
{
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
__kgem_bo_mark_dirty(bo);
}
return 0;
};
 
index = kgem->nreloc++;
assert(index < ARRAY_SIZE(kgem->reloc));
kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
if (bo) {
assert(bo->refcnt);
while (bo->proxy) {
DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
__FUNCTION__, bo->delta, bo->handle));
delta += bo->delta;
assert(bo->handle == bo->proxy->handle);
/* need to release the cache upon batch submit */
if (bo->exec == NULL) {
list_move_tail(&bo->request,
&kgem->next_request->buffers);
bo->rq = MAKE_REQUEST(kgem->next_request,
kgem->ring);
bo->exec = &_kgem_dummy_exec;
}
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty)
__kgem_bo_mark_dirty(bo);
 
bo = bo->proxy;
assert(bo->refcnt);
}
assert(bo->refcnt);
 
if (bo->exec == NULL)
kgem_add_bo(kgem, bo);
assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
assert(RQ_RING(bo->rq) == kgem->ring);
 
if (kgem->gen < 040 && read_write_domain & KGEM_RELOC_FENCED) {
if (bo->tiling &&
(bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
assert(kgem->nfence < kgem->fence_max);
kgem->aperture_fenced +=
kgem_bo_fenced_size(kgem, bo);
kgem->nfence++;
}
bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE;
}
 
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
 
if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
assert(!bo->snoop || kgem->can_blt_cpu);
__kgem_bo_mark_dirty(bo);
}
 
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
if (kgem->nreloc__self < 256)
kgem->reloc__self[kgem->nreloc__self++] = index;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;
kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
 
return delta;
}
 
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
 
DBG(("%s: type=%d, count=%d (bucket: %d)\n",
__FUNCTION__, type, kgem->vma[type].count, bucket));
if (kgem->vma[type].count <= 0)
return;
 
if (kgem->need_purge)
kgem_purge_cache(kgem);
 
/* vma are limited on a per-process basis to around 64k.
* This includes all malloc arenas as well as other file
* mappings. In order to be fair and not hog the cache,
* and more importantly not to exhaust that limit and to
* start failing mappings, we keep our own number of open
* vma to within a conservative value.
*/
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
 
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
j++) {
struct list *head = &kgem->vma[type].inactive[i++%ARRAY_SIZE(kgem->vma[type].inactive)];
if (!list_is_empty(head))
bo = list_last_entry(head, struct kgem_bo, vma);
}
if (bo == NULL)
break;
 
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__,
IS_CPU_MAP(bo->map) ? "CPU" : "GTT", bo->handle));
assert(IS_CPU_MAP(bo->map) == type);
assert(bo->map);
assert(bo->rq == NULL);
 
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map), bytes(bo)));
// munmap(MAP(bo->map), bytes(bo));
bo->map = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
 
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: freeing unpurgeable old mapping\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
}
}
}
 
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
return kgem_bo_map__cpu(kgem, bo);
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->proxy == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert(bo->exec == NULL);
assert_tiling(kgem, bo);
 
if (bo->tiling == I915_TILING_NONE && !bo->scanout &&
(kgem->has_llc || bo->domain == DOMAIN_CPU)) {
DBG(("%s: converting request for GTT map into CPU map\n",
__FUNCTION__));
ptr = kgem_bo_map__cpu(kgem, bo);
if (ptr)
kgem_bo_sync__cpu(kgem, bo);
return ptr;
}
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(kgem_bo_size(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
bo->needs_flush, bo->domain, __kgem_busy(kgem, bo->handle)));
 
/* XXX use PROT_READ to avoid the write flush? */
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_GTT;
bo->gtt_dirty = true;
}
}
 
return ptr;
}
 
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
 
DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
assert(!IS_USER_MAP(bo->map));
assert_tiling(kgem, bo);
 
if (IS_CPU_MAP(bo->map))
kgem_bo_release_map(kgem, bo);
 
ptr = bo->map;
if (ptr == NULL) {
assert(bytes(bo) <= kgem->aperture_mappable / 4);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
 
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
 
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
 
return ptr;
}
 
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->map)
return MAP(bo->map);
 
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
return bo->map = __kgem_bo_map__gtt(kgem, bo);
}
 
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
if (bo->map)
kgem_bo_release_map(kgem, bo);
 
kgem_trim_vma_cache(kgem, MAP_CPU, bucket(bo));
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain\n",
__FUNCTION__, bo->handle, bytes(bo));
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
 
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
 
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap mmap_arg;
 
DBG(("%s(handle=%d, size=%d, mapped? %d)\n",
__FUNCTION__, bo->handle, bytes(bo), (int)__MAP_TYPE(bo->map)));
assert(bo->refcnt);
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
 
if (IS_CPU_MAP(bo->map))
return MAP(bo->map);
 
retry:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
mmap_arg.offset = 0;
mmap_arg.size = bytes(bo);
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
int err = errno;
 
assert(err != EINVAL);
 
if (__kgem_throttle_retire(kgem, 0))
goto retry;
 
if (kgem->need_expire) {
kgem_cleanup_cache(kgem);
goto retry;
}
 
ErrorF("%s: failed to mmap handle=%d, %d bytes, into CPU domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), err);
return NULL;
}
 
VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bytes(bo)));
if (bo->map && bo->domain == DOMAIN_CPU) {
DBG(("%s: discarding GTT vma for %d\n", __FUNCTION__, bo->handle));
kgem_bo_release_map(kgem, bo);
}
if (bo->map == NULL) {
DBG(("%s: caching CPU vma for %d\n", __FUNCTION__, bo->handle));
bo->map = MAKE_CPU_MAP(mmap_arg.addr_ptr);
}
return (void *)(uintptr_t)mmap_arg.addr_ptr;
}
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(!bo->scanout);
kgem_bo_submit(kgem, bo);
 
/* SHM pixmaps use proxies for subpage offsets */
assert(!bo->purged);
while (bo->proxy)
bo = bo->proxy;
assert(!bo->purged);
 
if (bo->domain != DOMAIN_CPU || FORCE_MMAP_SYNC & (1 << DOMAIN_CPU)) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: SYNC: handle=%d, needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__, bo->handle,
bo->needs_flush, bo->domain,
__kgem_busy(kgem, bo->handle)));
 
VG_CLEAR(set_domain);
set_domain.handle = bo->handle;
set_domain.read_domains = I915_GEM_DOMAIN_CPU;
set_domain.write_domain = I915_GEM_DOMAIN_CPU;
 
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain) == 0) {
kgem_bo_retire(kgem, bo);
bo->domain = DOMAIN_CPU;
}
}
}
 
void kgem_clear_dirty(struct kgem *kgem)
{
struct list * const buffers = &kgem->next_request->buffers;
struct kgem_bo *bo;
 
list_for_each_entry(bo, buffers, request) {
if (!bo->gpu_dirty)
break;
 
bo->gpu_dirty = false;
}
}
 
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length)
{
struct kgem_bo *bo;
 
DBG(("%s: target handle=%d [proxy? %d], offset=%d, length=%d, io=%d\n",
__FUNCTION__, target->handle, target->proxy ? target->proxy->delta : -1,
offset, length, target->io));
 
bo = __kgem_bo_alloc(target->handle, length);
if (bo == NULL)
return NULL;
 
bo->unique_id = kgem_get_unique_id(kgem);
bo->reusable = false;
bo->size.bytes = length;
 
bo->io = target->io && target->proxy == NULL;
bo->gpu_dirty = target->gpu_dirty;
bo->tiling = target->tiling;
bo->pitch = target->pitch;
bo->flush = target->flush;
bo->snoop = target->snoop;
 
assert(!bo->scanout);
bo->proxy = kgem_bo_reference(target);
bo->delta = offset;
 
if (target->exec) {
list_move_tail(&bo->request, &kgem->next_request->buffers);
bo->exec = &_kgem_dummy_exec;
}
bo->rq = target->rq;
 
return bo;
}
 
#if 0
static struct kgem_buffer *
buffer_alloc(void)
{
struct kgem_buffer *bo;
 
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
 
bo->mem = NULL;
bo->need_io = false;
bo->mmapped = true;
 
return bo;
}
 
static struct kgem_buffer *
buffer_alloc_with_data(int num_pages)
{
struct kgem_buffer *bo;
 
bo = malloc(sizeof(*bo) + 2*UPLOAD_ALIGNMENT + num_pages * PAGE_SIZE);
if (bo == NULL)
return NULL;
 
bo->mem = (void *)ALIGN((uintptr_t)bo + sizeof(*bo), UPLOAD_ALIGNMENT);
bo->mmapped = false;
return bo;
}
 
static inline bool
use_snoopable_buffer(struct kgem *kgem, uint32_t flags)
{
if ((flags & KGEM_BUFFER_WRITE) == 0)
return kgem->gen >= 030;
 
return true;
}
 
static void
init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
{
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
assert(old->proxy == NULL);
 
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
list_replace(&old->request, &bo->base.request);
else
list_init(&bo->base.request);
list_replace(&old->vma, &bo->base.vma);
list_init(&bo->base.list);
free(old);
 
assert(bo->base.tiling == I915_TILING_NONE);
 
bo->base.refcnt = 1;
}
 
static struct kgem_buffer *
search_snoopable_buffer(struct kgem *kgem, unsigned alloc)
{
struct kgem_buffer *bo;
struct kgem_bo *old;
 
old = search_snoop_cache(kgem, alloc, 0);
if (old) {
if (!old->io) {
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
} else {
bo = (struct kgem_buffer *)old;
bo->base.refcnt = 1;
}
 
DBG(("%s: created CPU handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, num_pages(&bo->base)));
 
assert(bo->base.snoop);
assert(bo->base.tiling == I915_TILING_NONE);
assert(num_pages(&bo->base) >= alloc);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL) {
bo->base.refcnt = 0;
kgem_bo_free(kgem, &bo->base);
bo = NULL;
}
 
return bo;
}
 
return NULL;
}
 
static struct kgem_buffer *
create_snoopable_buffer(struct kgem *kgem, unsigned alloc)
{
struct kgem_buffer *bo;
uint32_t handle;
 
if (kgem->has_llc) {
struct kgem_bo *old;
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
old = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
if (old) {
init_buffer_from_bo(bo, old);
} else {
handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created CPU (LLC) handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, alloc));
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem != NULL)
return bo;
 
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
 
if (kgem->has_caching) {
struct kgem_bo *old;
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
old = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_CPU_MAP | CREATE_EXACT);
if (old) {
init_buffer_from_bo(bo, old);
} else {
handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created CPU handle=%d for buffer, size %d\n",
__FUNCTION__, bo->base.handle, alloc));
}
 
assert(bo->base.refcnt == 1);
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
if (!gem_set_caching(kgem->fd, bo->base.handle, SNOOPED))
goto free_caching;
 
bo->base.snoop = true;
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL)
goto free_caching;
 
return bo;
 
free_caching:
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
 
if (kgem->has_userptr) {
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
//if (posix_memalign(&ptr, 64, ALIGN(size, 64)))
if (posix_memalign(&bo->mem, PAGE_SIZE, alloc * PAGE_SIZE)) {
free(bo);
return NULL;
}
 
handle = gem_userptr(kgem->fd, bo->mem, alloc * PAGE_SIZE, false);
if (handle == 0) {
free(bo->mem);
free(bo);
return NULL;
}
 
debug_alloc(kgem, alloc);
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created snoop handle=%d for buffer\n",
__FUNCTION__, bo->base.handle));
 
assert(bo->mmapped == true);
assert(bo->need_io == false);
 
bo->base.refcnt = 1;
bo->base.snoop = true;
bo->base.map = MAKE_USER_MAP(bo->mem);
 
return bo;
}
 
return NULL;
}
 
struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t size, uint32_t flags,
void **ret)
{
struct kgem_buffer *bo;
unsigned offset, alloc;
struct kgem_bo *old;
 
DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
__FUNCTION__, size, flags,
!!(flags & KGEM_BUFFER_WRITE),
!!(flags & KGEM_BUFFER_INPLACE),
!!(flags & KGEM_BUFFER_LAST)));
assert(size);
/* we should never be asked to create anything TOO large */
assert(size <= kgem->max_object_size);
 
#if !DBG_NO_UPLOAD_CACHE
list_for_each_entry(bo, &kgem->batch_buffers, base.list) {
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
 
/* We can reuse any write buffer which we can fit */
if (flags == KGEM_BUFFER_LAST &&
bo->write == KGEM_BUFFER_WRITE &&
bo->base.refcnt == 1 && !bo->mmapped &&
size <= bytes(&bo->base)) {
DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
__FUNCTION__, size, bo->used, bytes(&bo->base)));
gem_write(kgem->fd, bo->base.handle,
0, bo->used, bo->mem);
kgem_buffer_release(kgem, bo);
bo->need_io = 0;
bo->write = 0;
offset = 0;
bo->used = size;
goto done;
}
 
if (flags & KGEM_BUFFER_WRITE) {
if ((bo->write & KGEM_BUFFER_WRITE) == 0 ||
(((bo->write & ~flags) & KGEM_BUFFER_INPLACE) &&
!bo->base.snoop)) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
assert(bo->mmapped || bo->need_io);
} else {
if (bo->write & KGEM_BUFFER_WRITE) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
}
 
if (bo->used + size <= bytes(&bo->base)) {
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
__FUNCTION__, bo->used, size, bytes(&bo->base)));
offset = bo->used;
bo->used += size;
goto done;
}
}
 
if (flags & KGEM_BUFFER_WRITE) {
list_for_each_entry(bo, &kgem->active_buffers, base.list) {
assert(bo->base.io);
assert(bo->base.refcnt >= 1);
assert(bo->mmapped);
assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop);
 
if (!kgem->has_llc && (bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
 
if (bo->used + size <= bytes(&bo->base)) {
DBG(("%s: reusing buffer? used=%d + size=%d, total=%d\n",
__FUNCTION__, bo->used, size, bytes(&bo->base)));
offset = bo->used;
bo->used += size;
list_move(&bo->base.list, &kgem->batch_buffers);
goto done;
}
}
}
#endif
 
#if !DBG_NO_MAP_UPLOAD
/* Be a little more generous and hope to hold fewer mmappings */
alloc = ALIGN(2*size, kgem->buffer_size);
if (alloc > MAX_CACHE_SIZE)
alloc = ALIGN(size, kgem->buffer_size);
if (alloc > MAX_CACHE_SIZE)
alloc = PAGE_ALIGN(size);
assert(alloc);
 
if (alloc > kgem->aperture_mappable / 4)
flags &= ~KGEM_BUFFER_INPLACE;
alloc /= PAGE_SIZE;
 
if (kgem->has_llc &&
(flags & KGEM_BUFFER_WRITE_INPLACE) != KGEM_BUFFER_WRITE_INPLACE) {
bo = buffer_alloc();
if (bo == NULL)
goto skip_llc;
 
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
if (old == NULL)
old = search_linear_cache(kgem, NUM_PAGES(size), CREATE_INACTIVE | CREATE_CPU_MAP);
if (old) {
DBG(("%s: found LLC handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
init_buffer_from_bo(bo, old);
} else {
uint32_t handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
goto skip_llc;
}
__kgem_bo_init(&bo->base, handle, alloc);
DBG(("%s: created LLC handle=%d for buffer\n",
__FUNCTION__, bo->base.handle));
 
debug_alloc(kgem, alloc);
}
 
assert(bo->mmapped);
assert(!bo->need_io);
 
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
}
}
skip_llc:
 
if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
/* The issue with using a GTT upload buffer is that we may
* cause eviction-stalls in order to free up some GTT space.
* An is-mappable? ioctl could help us detect when we are
* about to block, or some per-page magic in the kernel.
*
* XXX This is especially noticeable on memory constrained
* devices like gen2 or with relatively slow gpu like i3.
*/
DBG(("%s: searching for an inactive GTT map for upload\n",
__FUNCTION__));
old = search_linear_cache(kgem, alloc,
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
#if HAVE_I915_GEM_BUFFER_INFO
if (old) {
struct drm_i915_gem_buffer_info info;
 
/* An example of such a non-blocking ioctl might work */
 
VG_CLEAR(info);
info.handle = handle;
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_BUFFER_INFO,
&fino) == 0) {
old->presumed_offset = info.addr;
if ((info.flags & I915_GEM_MAPPABLE) == 0) {
kgem_bo_move_to_inactive(kgem, old);
old = NULL;
}
}
}
#endif
if (old == NULL)
old = search_linear_cache(kgem, NUM_PAGES(size),
CREATE_EXACT | CREATE_INACTIVE | CREATE_GTT_MAP);
if (old == NULL) {
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old && !__kgem_bo_is_mappable(kgem, old)) {
_kgem_bo_destroy(kgem, old);
old = NULL;
}
}
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
assert(__kgem_bo_is_mappable(kgem, old));
assert(!old->snoop);
assert(old->rq == NULL);
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
assert(num_pages(&bo->base) >= NUM_PAGES(size));
 
assert(bo->mmapped);
assert(bo->base.refcnt == 1);
 
bo->mem = kgem_bo_map(kgem, &bo->base);
if (bo->mem) {
if (IS_CPU_MAP(bo->base.map))
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
} else {
bo->base.refcnt = 0;
kgem_bo_free(kgem, &bo->base);
}
}
}
#else
flags &= ~KGEM_BUFFER_INPLACE;
#endif
/* Be more parsimonious with pwrite/pread/cacheable buffers */
if ((flags & KGEM_BUFFER_INPLACE) == 0)
alloc = NUM_PAGES(size);
 
if (use_snoopable_buffer(kgem, flags)) {
bo = search_snoopable_buffer(kgem, alloc);
if (bo) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
flags &= ~KGEM_BUFFER_INPLACE;
goto init;
}
 
if ((flags & KGEM_BUFFER_INPLACE) == 0) {
bo = create_snoopable_buffer(kgem, alloc);
if (bo)
goto init;
}
}
 
flags &= ~KGEM_BUFFER_INPLACE;
 
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, 0);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old) {
DBG(("%s: reusing ordinary handle %d for io\n",
__FUNCTION__, old->handle));
bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL)
return NULL;
 
init_buffer_from_bo(bo, old);
bo->need_io = flags & KGEM_BUFFER_WRITE;
} else {
unsigned hint;
 
if (use_snoopable_buffer(kgem, flags)) {
bo = create_snoopable_buffer(kgem, alloc);
if (bo)
goto init;
}
 
bo = buffer_alloc();
if (bo == NULL)
return NULL;
 
hint = CREATE_INACTIVE;
if (flags & KGEM_BUFFER_WRITE)
hint |= CREATE_CPU_MAP;
old = search_linear_cache(kgem, alloc, hint);
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
 
init_buffer_from_bo(bo, old);
} else {
uint32_t handle = gem_create(kgem->fd, alloc);
if (handle == 0) {
free(bo);
return NULL;
}
 
DBG(("%s: created handle=%d for buffer\n",
__FUNCTION__, handle));
 
__kgem_bo_init(&bo->base, handle, alloc);
debug_alloc(kgem, alloc * PAGE_SIZE);
}
 
assert(bo->mmapped);
assert(!bo->need_io);
assert(bo->base.refcnt == 1);
 
if (flags & KGEM_BUFFER_WRITE) {
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem != NULL) {
kgem_bo_sync__cpu(kgem, &bo->base);
goto init;
}
}
 
DBG(("%s: failing back to new pwrite buffer\n", __FUNCTION__));
old = &bo->base;
bo = buffer_alloc_with_data(num_pages(old));
if (bo == NULL) {
old->refcnt= 0;
kgem_bo_free(kgem, old);
return NULL;
}
 
init_buffer_from_bo(bo, old);
 
assert(bo->mem);
assert(!bo->mmapped);
assert(bo->base.refcnt == 1);
 
bo->need_io = flags & KGEM_BUFFER_WRITE;
}
init:
bo->base.io = true;
assert(bo->base.refcnt == 1);
assert(num_pages(&bo->base) >= NUM_PAGES(size));
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
assert(!bo->mmapped || bo->base.map != NULL);
 
bo->used = size;
bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
offset = 0;
 
assert(list_is_empty(&bo->base.list));
list_add(&bo->base.list, &kgem->batch_buffers);
 
DBG(("%s(pages=%d [%d]) new handle=%d, used=%d, write=%d\n",
__FUNCTION__, num_pages(&bo->base), alloc, bo->base.handle, bo->used, bo->write));
 
done:
bo->used = ALIGN(bo->used, UPLOAD_ALIGNMENT);
assert(bo->mem);
*ret = (char *)bo->mem + offset;
return kgem_create_proxy(kgem, &bo->base, offset, size);
}
 
bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
{
struct kgem_buffer *bo = (struct kgem_buffer *)_bo->proxy;
return bo->write & KGEM_BUFFER_WRITE_INPLACE;
}
 
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret)
{
struct kgem_bo *bo;
int stride;
 
assert(width > 0 && height > 0);
assert(ret != NULL);
stride = ALIGN(width, 2) * bpp >> 3;
stride = ALIGN(stride, 4);
 
DBG(("%s: %dx%d, %d bpp, stride=%d\n",
__FUNCTION__, width, height, bpp, stride));
 
bo = kgem_create_buffer(kgem, stride * ALIGN(height, 2), flags, ret);
if (bo == NULL) {
DBG(("%s: allocation failure for upload buffer\n",
__FUNCTION__));
return NULL;
}
assert(*ret != NULL);
assert(bo->proxy != NULL);
 
if (height & 1) {
struct kgem_buffer *io = (struct kgem_buffer *)bo->proxy;
int min;
 
assert(io->used);
 
/* Having padded this surface to ensure that accesses to
* the last pair of rows is valid, remove the padding so
* that it can be allocated to other pixmaps.
*/
min = bo->delta + height * stride;
min = ALIGN(min, UPLOAD_ALIGNMENT);
if (io->used != min) {
DBG(("%s: trimming buffer from %d to %d\n",
__FUNCTION__, io->used, min));
io->used = min;
}
bo->size.bytes -= stride;
}
 
bo->map = MAKE_CPU_MAP(*ret);
bo->pitch = stride;
bo->unique_id = kgem_get_unique_id(kgem);
return bo;
}
 
struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
const void *data,
const BoxRec *box,
int stride, int bpp)
{
int width = box->x2 - box->x1;
int height = box->y2 - box->y1;
struct kgem_bo *bo;
void *dst;
 
if (!kgem_can_create_2d(kgem, width, height, bpp))
return NULL;
 
DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
__FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp));
 
assert(data);
assert(width > 0);
assert(height > 0);
assert(stride);
assert(bpp);
 
bo = kgem_create_buffer_2d(kgem,
width, height, bpp,
KGEM_BUFFER_WRITE_INPLACE, &dst);
if (bo)
memcpy_blt(data, dst, bpp,
stride, bo->pitch,
box->x1, box->y1,
0, 0,
width, height);
 
return bo;
}
 
void kgem_proxy_bo_attach(struct kgem_bo *bo,
struct kgem_bo **ptr)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->map == NULL || IS_CPU_MAP(bo->map));
assert(bo->proxy);
list_add(&bo->vma, &bo->proxy->vma);
bo->map = ptr;
*ptr = kgem_bo_reference(bo);
}
 
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
{
struct kgem_buffer *bo;
uint32_t offset = _bo->delta, length = _bo->size.bytes;
 
/* We expect the caller to have already submitted the batch */
assert(_bo->io);
assert(_bo->exec == NULL);
assert(_bo->rq == NULL);
assert(_bo->proxy);
 
_bo = _bo->proxy;
assert(_bo->proxy == NULL);
assert(_bo->exec == NULL);
 
bo = (struct kgem_buffer *)_bo;
 
DBG(("%s(offset=%d, length=%d, snooped=%d)\n", __FUNCTION__,
offset, length, bo->base.snoop));
 
if (bo->mmapped) {
struct drm_i915_gem_set_domain set_domain;
 
DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n",
__FUNCTION__,
bo->base.needs_flush,
bo->base.domain,
__kgem_busy(kgem, bo->base.handle)));
 
assert(!IS_CPU_MAP(bo->base.map) || bo->base.snoop || kgem->has_llc);
 
VG_CLEAR(set_domain);
set_domain.handle = bo->base.handle;
set_domain.write_domain = 0;
set_domain.read_domains =
IS_CPU_MAP(bo->base.map) ? I915_GEM_DOMAIN_CPU : I915_GEM_DOMAIN_GTT;
 
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain))
return;
} else {
if (gem_read(kgem->fd,
bo->base.handle, (char *)bo->mem+offset,
offset, length))
return;
}
kgem_bo_retire(kgem, &bo->base);
bo->base.domain = DOMAIN_NONE;
}
#endif
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b && b->offset; b = b->next)
if (format == b->format)
return b->offset;
 
return 0;
}
 
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
{
struct kgem_bo_binding *b;
 
for (b = &bo->binding; b; b = b->next) {
if (b->offset)
continue;
 
b->offset = offset;
b->format = format;
 
if (b->next)
b->next->offset = 0;
 
return;
}
 
b = malloc(sizeof(*b));
if (b) {
b->next = bo->binding.next;
b->format = format;
b->offset = offset;
bo->binding.next = b;
}
}
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
bo = __kgem_bo_alloc(-2, size);
if (!bo) {
return 0;
}
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = fb->pitch;
bo->tiling = I915_TILING_X;
bo->scanout = 1;
fb->fb_bo = bo;
 
// printf("fb width %d height %d pitch %d bo %p\n",
// fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
};
 
 
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb)
{
struct kgem_bo *bo;
size_t size;
int ret;
 
bo = fb->fb_bo;
 
ret = drmIoctl(kgem->fd, SRV_FBINFO, fb);
if( ret != 0 )
return 0;
 
fb->fb_bo = bo;
 
size = fb->pitch * fb->height / PAGE_SIZE;
 
if((size != bo->size.pages.count) ||
(fb->pitch != bo->pitch))
{
bo->size.pages.count = size;
bo->pitch = fb->pitch;
 
printf("fb width %d height %d pitch %d bo %p\n",
fb->width, fb->height, fb->pitch, fb->fb_bo);
 
return 1;
}
 
return 0;
};
 
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
kgem_bo_destroy(kgem, bo);
kgem_bo_free(kgem, bo);
}
 
 
void kgem_close_batches(struct kgem *kgem)
{
int n;
 
for (n = 0; n < ARRAY_SIZE(kgem->pinned_batches); n++) {
while (!list_is_empty(&kgem->pinned_batches[n])) {
kgem_bo_destroy(kgem,
list_first_entry(&kgem->pinned_batches[n],
struct kgem_bo, list));
}
}
};
 
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
int pitch, int height)
{
struct kgem_bo *bo;
int size;
 
size = pitch * height / PAGE_SIZE;
 
bo = __kgem_bo_alloc(handle, size);
if(bo == NULL)
return NULL;
 
bo->domain = DOMAIN_GTT;
bo->unique_id = kgem_get_unique_id(kgem);
bo->pitch = pitch;
bo->tiling = I915_TILING_X;
bo->scanout = 0;
 
return bo;
}
/drivers/video/Intel-2D/sna/kgem.h
0,0 → 1,761
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef KGEM_H
#define KGEM_H
 
#include <stdint.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdarg.h>
 
#include <i915_drm.h>
 
#include "compiler.h"
#include "intel_list.h"
 
#include <limits.h>
#if !defined(MAXSHORT) || !defined(MINSHORT) || \
!defined(MAXINT) || !defined(MININT)
/*
* Some implementations #define these through <math.h>, so preclude
* #include'ing it later.
*/
 
#include <math.h>
#undef MAXSHORT
#define MAXSHORT SHRT_MAX
#undef MINSHORT
#define MINSHORT SHRT_MIN
#undef MAXINT
#define MAXINT INT_MAX
#undef MININT
#define MININT INT_MIN
 
#endif
 
struct kgem_bo {
struct kgem_request *rq;
#define RQ(rq) ((struct kgem_request *)((uintptr_t)(rq) & ~3))
#define RQ_RING(rq) ((uintptr_t)(rq) & 3)
#define RQ_IS_BLT(rq) (RQ_RING(rq) == KGEM_BLT)
struct drm_i915_gem_exec_object2 *exec;
 
struct kgem_bo *proxy;
 
struct list list;
struct list request;
struct list vma;
 
void *map;
#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
#define IS_GTT_MAP(ptr) (ptr && ((uintptr_t)(ptr) & 1) == 0)
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
struct kgem_bo_binding {
struct kgem_bo_binding *next;
uint32_t format;
uint16_t offset;
} binding;
 
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
struct {
uint32_t count:27;
#define PAGE_SIZE 4096
uint32_t bucket:5;
#define NUM_CACHE_BUCKETS 16
#define MAX_CACHE_SIZE (1 << (NUM_CACHE_BUCKETS+12))
} pages;
uint32_t bytes;
} size;
uint32_t pitch : 18; /* max 128k */
uint32_t tiling : 2;
uint32_t reusable : 1;
uint32_t gpu_dirty : 1;
uint32_t gtt_dirty : 1;
uint32_t domain : 2;
uint32_t needs_flush : 1;
uint32_t snoop : 1;
uint32_t io : 1;
uint32_t flush : 1;
uint32_t scanout : 1;
uint32_t purged : 1;
};
#define DOMAIN_NONE 0
#define DOMAIN_CPU 1
#define DOMAIN_GTT 2
#define DOMAIN_GPU 3
 
struct kgem_request {
struct list list;
struct kgem_bo *bo;
struct list buffers;
int ring;
};
 
enum {
MAP_GTT = 0,
MAP_CPU,
NUM_MAP_TYPES,
};
 
struct kgem {
int fd;
int wedged;
unsigned gen;
 
uint32_t unique_id;
 
enum kgem_mode {
/* order matches I915_EXEC_RING ordering */
KGEM_NONE = 0,
KGEM_RENDER,
KGEM_BSD,
KGEM_BLT,
} mode, ring;
 
struct list flushing;
struct list large;
struct list large_inactive;
struct list active[NUM_CACHE_BUCKETS][3];
struct list inactive[NUM_CACHE_BUCKETS];
struct list pinned_batches[2];
struct list snoop;
struct list scanout;
struct list batch_buffers, active_buffers;
 
struct list requests[2];
struct kgem_request *next_request;
struct kgem_request static_request;
 
struct {
struct list inactive[NUM_CACHE_BUCKETS];
int16_t count;
} vma[NUM_MAP_TYPES];
 
uint32_t batch_flags;
uint32_t batch_flags_base;
#define I915_EXEC_SECURE (1<<9)
#define LOCAL_EXEC_OBJECT_WRITE (1<<2)
 
uint16_t nbatch;
uint16_t surface;
uint16_t nexec;
uint16_t nreloc;
uint16_t nreloc__self;
uint16_t nfence;
uint16_t batch_size;
uint16_t min_alignment;
 
uint32_t flush:1;
uint32_t need_expire:1;
uint32_t need_purge:1;
uint32_t need_retire:1;
uint32_t need_throttle:1;
uint32_t scanout_busy:1;
uint32_t busy:1;
 
uint32_t has_create2 :1;
uint32_t has_userptr :1;
uint32_t has_blt :1;
uint32_t has_relaxed_fencing :1;
uint32_t has_relaxed_delta :1;
uint32_t has_semaphores :1;
uint32_t has_secure_batches :1;
uint32_t has_pinned_batches :1;
uint32_t has_caching :1;
uint32_t has_llc :1;
uint32_t has_wt :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
 
uint32_t can_blt_cpu :1;
 
uint16_t fence_max;
uint16_t half_cpu_cache_pages;
uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
uint32_t aperture, aperture_fenced;
uint32_t max_upload_tile_size, max_copy_tile_size;
uint32_t max_gpu_size, max_cpu_size;
uint32_t large_object_size, max_object_size;
uint32_t buffer_size;
 
void (*context_switch)(struct kgem *kgem, int new_mode);
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
 
#if 0
void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
#endif
 
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[384] page_aligned;
struct drm_i915_gem_relocation_entry reloc[8192] page_aligned;
 
#ifdef DEBUG_MEMORY
struct {
int bo_allocs;
size_t bo_bytes;
} debug_memory;
#endif
};
 
#define KGEM_MAX_DEFERRED_VBO 16
 
#define KGEM_BATCH_RESERVED 1
#define KGEM_RELOC_RESERVED (KGEM_MAX_DEFERRED_VBO)
#define KGEM_EXEC_RESERVED (1+KGEM_MAX_DEFERRED_VBO)
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
#endif
 
#define KGEM_BATCH_SIZE(K) ((K)->batch_size-KGEM_BATCH_RESERVED)
#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED)
#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED)
 
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen);
void kgem_reset(struct kgem *kgem);
 
struct kgem_bo *kgem_create_map(struct kgem *kgem,
void *ptr, uint32_t size,
bool read_only);
 
struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name);
struct kgem_bo *kgem_create_for_prime(struct kgem *kgem, int name, uint32_t size);
int kgem_bo_export_to_prime(struct kgem *kgem, struct kgem_bo *bo);
 
struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size, unsigned flags);
struct kgem_bo *kgem_create_proxy(struct kgem *kgem,
struct kgem_bo *target,
int offset, int length);
 
void kgem_proxy_bo_attach(struct kgem_bo *bo, struct kgem_bo **ptr);
 
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
unsigned kgem_can_create_2d(struct kgem *kgem, int width, int height, int depth);
#define KGEM_CAN_CREATE_GPU 0x1
#define KGEM_CAN_CREATE_CPU 0x2
#define KGEM_CAN_CREATE_LARGE 0x4
#define KGEM_CAN_CREATE_GTT 0x8
 
uint32_t kgem_get_unique_id(struct kgem *kgem);
 
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
uint32_t width,
uint32_t height,
uint32_t pitch,
uint32_t bpp);
enum {
CREATE_EXACT = 0x1,
CREATE_INACTIVE = 0x2,
CREATE_CPU_MAP = 0x4,
CREATE_GTT_MAP = 0x8,
CREATE_SCANOUT = 0x10,
CREATE_PRIME = 0x20,
CREATE_TEMPORARY = 0x40,
CREATE_CACHED = 0x80,
CREATE_NO_RETIRE = 0x100,
CREATE_NO_THROTTLE = 0x200,
};
struct kgem_bo *kgem_create_2d(struct kgem *kgem,
int width,
int height,
int bpp,
int tiling,
uint32_t flags);
struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
int width,
int height,
int bpp,
uint32_t flags);
 
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
 
bool kgem_retire(struct kgem *kgem);
 
bool __kgem_ring_is_idle(struct kgem *kgem, int ring);
static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring)
{
ring = ring == KGEM_BLT;
 
if (list_is_empty(&kgem->requests[ring]))
return true;
 
return __kgem_ring_is_idle(kgem, ring);
}
 
static inline bool kgem_is_idle(struct kgem *kgem)
{
if (!kgem->need_retire)
return true;
 
return kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline bool __kgem_ring_empty(struct kgem *kgem)
{
return list_is_empty(&kgem->requests[kgem->ring == KGEM_BLT]);
}
 
void _kgem_submit(struct kgem *kgem);
static inline void kgem_submit(struct kgem *kgem)
{
if (kgem->nbatch)
_kgem_submit(kgem);
}
 
static inline bool kgem_flush(struct kgem *kgem, bool flush)
{
if (kgem->nreloc == 0)
return false;
 
return (kgem->flush ^ flush) && kgem_ring_is_idle(kgem, kgem->ring);
}
 
static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo)
{
if (bo->exec)
_kgem_submit(kgem);
}
 
void kgem_scanout_flush(struct kgem *kgem, struct kgem_bo *bo);
 
static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo)
{
assert(bo->refcnt);
bo->refcnt++;
return bo;
}
 
void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
{
assert(bo->refcnt);
if (--bo->refcnt == 0)
_kgem_bo_destroy(kgem, bo);
}
 
void kgem_clear_dirty(struct kgem *kgem);
 
static inline void kgem_set_mode(struct kgem *kgem,
enum kgem_mode mode,
struct kgem_bo *bo)
{
assert(!kgem->wedged);
 
#if DEBUG_FLUSH_BATCH
kgem_submit(kgem);
#endif
 
if (kgem->nreloc && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
_kgem_submit(kgem);
 
if (kgem->mode == mode)
return;
 
// kgem->context_switch(kgem, mode);
kgem->mode = mode;
}
 
static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
{
assert(kgem->mode == KGEM_NONE);
assert(kgem->nbatch == 0);
assert(!kgem->wedged);
// kgem->context_switch(kgem, mode);
kgem->mode = mode;
}
 
static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords)
{
assert(num_dwords > 0);
assert(kgem->nbatch < kgem->surface);
assert(kgem->surface <= kgem->batch_size);
return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface);
}
 
static inline bool kgem_check_reloc(struct kgem *kgem, int n)
{
assert(kgem->nreloc <= KGEM_RELOC_SIZE(kgem));
return likely(kgem->nreloc + n <= KGEM_RELOC_SIZE(kgem));
}
 
static inline bool kgem_check_exec(struct kgem *kgem, int n)
{
assert(kgem->nexec <= KGEM_EXEC_SIZE(kgem));
return likely(kgem->nexec + n <= KGEM_EXEC_SIZE(kgem));
}
 
static inline bool kgem_check_reloc_and_exec(struct kgem *kgem, int n)
{
return kgem_check_reloc(kgem, n) && kgem_check_exec(kgem, n);
}
 
static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
int num_dwords,
int num_surfaces)
{
return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) &&
kgem_check_reloc(kgem, num_surfaces) &&
kgem_check_exec(kgem, num_surfaces);
}
 
static inline uint32_t *kgem_get_batch(struct kgem *kgem)
{
if (kgem->nreloc) {
unsigned mode = kgem->mode;
_kgem_submit(kgem);
_kgem_set_mode(kgem, mode);
}
 
return kgem->batch + kgem->nbatch;
}
 
bool kgem_check_bo(struct kgem *kgem, ...) __attribute__((sentinel(0)));
bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_check_many_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(0)));
 
#define KGEM_RELOC_FENCED 0x8000
uint32_t kgem_add_reloc(struct kgem *kgem,
uint32_t pos,
struct kgem_bo *bo,
uint32_t read_write_domains,
uint32_t delta);
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write);
void *__kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
void __kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length);
 
int kgem_bo_fenced_size(struct kgem *kgem, struct kgem_bo *bo);
void kgem_get_tile_size(struct kgem *kgem, int tiling,
int *tile_width, int *tile_height, int *tile_size);
 
static inline int __kgem_buffer_size(struct kgem_bo *bo)
{
assert(bo->proxy != NULL);
return bo->size.bytes;
}
 
static inline int __kgem_bo_size(struct kgem_bo *bo)
{
assert(bo->proxy == NULL);
return PAGE_SIZE * bo->size.pages.count;
}
 
static inline int kgem_bo_size(struct kgem_bo *bo)
{
if (bo->proxy)
return __kgem_buffer_size(bo);
else
return __kgem_bo_size(bo);
}
 
/*
static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
struct kgem_bo *bo)
{
int pitch = bo->pitch;
if (kgem->gen >= 040 && bo->tiling)
pitch /= 4;
if (pitch > MAXSHORT) {
DBG(("%s: can not blt to handle=%d, adjusted pitch=%d\n",
__FUNCTION__, bo->handle, pitch));
return false;
}
 
return true;
}
 
static inline bool kgem_bo_can_blt(struct kgem *kgem,
struct kgem_bo *bo)
{
if (bo->tiling == I915_TILING_Y) {
DBG(("%s: can not blt to handle=%d, tiling=Y\n",
__FUNCTION__, bo->handle));
return false;
}
 
return kgem_bo_blt_pitch_is_ok(kgem, bo);
}
*/
 
static inline bool __kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
if (bo->domain == DOMAIN_GTT)
return true;
 
if (kgem->gen < 040 && bo->tiling &&
bo->presumed_offset & (kgem_bo_fenced_size(kgem, bo) - 1))
return false;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
if (kgem->has_llc && bo->tiling == I915_TILING_NONE)
return true;
 
if (!bo->presumed_offset)
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
 
return bo->presumed_offset + kgem_bo_size(bo) <= kgem->aperture_mappable;
}
 
static inline bool kgem_bo_is_mappable(struct kgem *kgem,
struct kgem_bo *bo)
{
DBG(("%s: domain=%d, offset: %d size: %d\n",
__FUNCTION__, bo->domain, bo->presumed_offset, kgem_bo_size(bo)));
assert(bo->refcnt);
return __kgem_bo_is_mappable(kgem, bo);
}
 
static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: map=%p, tiling=%d, domain=%d\n",
__FUNCTION__, bo->map, bo->tiling, bo->domain));
assert(bo->refcnt);
 
if (bo->map == NULL)
return bo->tiling == I915_TILING_NONE && bo->domain == DOMAIN_CPU;
 
return IS_CPU_MAP(bo->map) == !bo->tiling;
}
 
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
if (kgem_bo_mapped(kgem, bo))
return true;
 
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
 
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
 
return kgem_bo_size(bo) <= kgem->aperture_mappable / 4;
}
 
static inline bool kgem_bo_can_map__cpu(struct kgem *kgem,
struct kgem_bo *bo,
bool write)
{
if (bo->purged || (bo->scanout && write))
return false;
 
if (kgem->has_llc)
return true;
 
if (bo->domain != DOMAIN_CPU)
return false;
 
return !write || bo->exec == NULL;
}
 
static inline bool kgem_bo_is_snoop(struct kgem_bo *bo)
{
assert(bo->refcnt);
while (bo->proxy)
bo = bo->proxy;
return bo->snoop;
}
 
void kgem_bo_undo(struct kgem *kgem, struct kgem_bo *bo);
 
bool __kgem_busy(struct kgem *kgem, int handle);
 
static inline void kgem_bo_mark_busy(struct kgem_bo *bo, int ring)
{
bo->rq = (struct kgem_request *)((uintptr_t)bo->rq | ring);
}
 
inline static void __kgem_bo_clear_busy(struct kgem_bo *bo)
{
bo->rq = NULL;
list_del(&bo->request);
 
bo->domain = DOMAIN_NONE;
bo->needs_flush = false;
bo->gtt_dirty = false;
}
 
static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
assert(bo->refcnt);
return bo->rq;
}
 
static inline bool __kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, domain: %d exec? %d, rq? %d\n", __FUNCTION__,
bo->handle, bo->domain, bo->exec != NULL, bo->rq != NULL));
assert(bo->refcnt);
 
if (bo->exec)
return true;
 
if (kgem_flush(kgem, bo->flush))
kgem_submit(kgem);
 
if (bo->rq && !__kgem_busy(kgem, bo->handle))
__kgem_bo_clear_busy(bo);
 
return kgem_bo_is_busy(bo);
}
 
static inline bool kgem_bo_is_render(struct kgem_bo *bo)
{
DBG(("%s: handle=%d, rq? %d [%d]\n", __FUNCTION__,
bo->handle, bo->rq != NULL, (int)RQ_RING(bo->rq)));
assert(bo->refcnt);
return bo->rq && RQ_RING(bo->rq) == I915_EXEC_RENDER;
}
 
static inline void kgem_bo_mark_unreusable(struct kgem_bo *bo)
{
while (bo->proxy) {
bo->flush = true;
bo = bo->proxy;
}
bo->flush = true;
bo->reusable = false;
}
 
static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
{
if (bo == NULL)
return false;
 
assert(bo->refcnt);
return bo->gpu_dirty;
}
 
static inline void kgem_bo_unclean(struct kgem *kgem, struct kgem_bo *bo)
{
/* The bo is outside of our control, so presume it is written to */
bo->needs_flush = true;
if (bo->rq == NULL)
bo->rq = (void *)kgem;
 
if (bo->domain != DOMAIN_GPU)
bo->domain = DOMAIN_NONE;
}
 
static inline void __kgem_bo_mark_dirty(struct kgem_bo *bo)
{
DBG(("%s: handle=%d (proxy? %d)\n", __FUNCTION__,
bo->handle, bo->proxy != NULL));
 
bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE;
bo->needs_flush = bo->gpu_dirty = true;
list_move(&bo->request, &RQ(bo->rq)->buffers);
}
 
static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
{
assert(bo->refcnt);
do {
assert(bo->exec);
assert(bo->rq);
 
if (bo->gpu_dirty)
return;
 
__kgem_bo_mark_dirty(bo);
} while ((bo = bo->proxy));
}
 
#define KGEM_BUFFER_WRITE 0x1
#define KGEM_BUFFER_INPLACE 0x2
#define KGEM_BUFFER_LAST 0x4
 
#define KGEM_BUFFER_WRITE_INPLACE (KGEM_BUFFER_WRITE | KGEM_BUFFER_INPLACE)
 
struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t size, uint32_t flags,
void **ret);
struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
int width, int height, int bpp,
uint32_t flags,
void **ret);
bool kgem_buffer_is_inplace(struct kgem_bo *bo);
void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
 
void kgem_throttle(struct kgem *kgem);
#define MAX_INACTIVE_TIME 10
bool kgem_expire_cache(struct kgem *kgem);
void kgem_purge_cache(struct kgem *kgem);
void kgem_cleanup_cache(struct kgem *kgem);
 
void kgem_clean_scanout_cache(struct kgem *kgem);
void kgem_clean_large_cache(struct kgem *kgem);
 
#if HAS_DEBUG_FULL
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch);
#else
static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
{
(void)kgem;
(void)nbatch;
}
#endif
 
#endif /* KGEM_H */
/drivers/video/Intel-2D/sna/kgem_debug.c
0,0 → 1,424
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
//#include <sys/mman.h>
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "kgem_debug.h"
 
struct drm_i915_gem_relocation_entry *
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset)
{
int i;
 
offset *= sizeof(uint32_t);
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == offset)
return kgem->reloc+i;
 
assert(!"valid relocation entry, unknown batch offset");
return NULL;
}
 
struct kgem_bo *
kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem,
struct drm_i915_gem_relocation_entry *reloc)
{
struct kgem_bo *bo;
 
if (reloc == NULL)
return NULL;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->target_handle == reloc->target_handle && bo->proxy == NULL)
break;
 
assert(&bo->request != &kgem->next_request->buffers);
 
return bo;
}
 
static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle)
{
int i;
 
if (kgem->has_handle_lut)
return kgem->exec[handle].flags & EXEC_OBJECT_NEEDS_FENCE;
 
for (i = 0; i < kgem->nexec; i++)
if (kgem->exec[i].handle == handle)
return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE;
 
return 0;
}
 
static int kgem_debug_handle_tiling(struct kgem *kgem, uint32_t handle)
{
struct kgem_bo *bo;
 
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->target_handle == handle)
return bo->tiling;
 
return 0;
}
 
void
kgem_debug_print(const uint32_t *data,
uint32_t offset, unsigned int index,
const char *fmt, ...)
{
va_list va;
char buf[240];
int len;
 
len = snprintf(buf, sizeof(buf),
"0x%08x: 0x%08x: %s",
(offset + index) * 4,
data[index],
index == 0 ? "" : " ");
 
va_start(va, fmt);
vsnprintf(buf + len, sizeof(buf) - len, fmt, va);
va_end(va);
 
ErrorF("%s", buf);
delay(1);
}
 
static int
decode_nop(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
kgem_debug_print(data, offset, 0, "UNKNOWN\n");
assert(0);
return 1;
}
 
static int
decode_mi(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int len_mask;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
{ 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" },
{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
{ 0x04, 0, 1, 1, "MI_FLUSH" },
{ 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" },
{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
{ 0x00, 0, 1, 1, "MI_NOOP" },
{ 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
{ 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
{ 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
{ 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
{ 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
{ 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
{ 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
{ 0x16, 0x7f, 3, 3, "MI_SEMAPHORE_MBOX" },
{ 0x26, 0x1f, 3, 4, "MI_FLUSH_DW" },
{ 0x0b, 0, 1, 1, "MI_SUSPEND_FLUSH" },
};
uint32_t *data = kgem->batch + offset;
int op;
 
for (op = 0; op < ARRAY_SIZE(opcodes); op++) {
if ((data[0] & 0x1f800000) >> 23 == opcodes[op].opcode) {
unsigned int len = 1, i;
 
kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name);
if (opcodes[op].max_len > 1) {
len = (data[0] & opcodes[op].len_mask) + 2;
if (len < opcodes[op].min_len ||
len > opcodes[op].max_len)
{
ErrorF("Bad length (%d) in %s, [%d, %d]\n",
len, opcodes[op].name,
opcodes[op].min_len,
opcodes[op].max_len);
assert(0);
}
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
}
 
kgem_debug_print(data, offset, 0, "MI UNKNOWN\n");
assert(0);
return 1;
}
 
static int
decode_2d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x40, 5, 5, "COLOR_BLT" },
{ 0x43, 6, 6, "SRC_COPY_BLT" },
{ 0x01, 8, 8, "XY_SETUP_BLT" },
{ 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
{ 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
{ 0x24, 2, 2, "XY_PIXEL_BLT" },
{ 0x25, 3, 3, "XY_SCANLINES_BLT" },
{ 0x26, 4, 4, "Y_TEXT_BLT" },
{ 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
{ 0x50, 6, 6, "XY_COLOR_BLT" },
{ 0x51, 6, 6, "XY_PAT_BLT" },
{ 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
{ 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
{ 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
{ 0x52, 9, 9, "XY_MONO_PAT_BLT" },
{ 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
{ 0x53, 8, 8, "XY_SRC_COPY_BLT" },
{ 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
{ 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
{ 0x55, 9, 9, "XY_FULL_BLT" },
{ 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
{ 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
{ 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
{ 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
{ 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
};
 
unsigned int op, len;
const char *format = NULL;
uint32_t *data = kgem->batch + offset;
struct drm_i915_gem_relocation_entry *reloc;
 
/* Special case the two most common ops that we detail in full */
switch ((data[0] & 0x1fc00000) >> 22) {
case 0x50:
kgem_debug_print(data, offset, 0,
"XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
(data[0] & (1 << 20)) ? "en" : "dis",
(data[0] & (1 << 21)) ? "en" : "dis",
(data[0] >> 11) & 1);
 
len = (data[0] & 0x000000ff) + 2;
assert(len == 6);
 
switch ((data[1] >> 24) & 0x3) {
case 0:
format="8";
break;
case 1:
format="565";
break;
case 2:
format="1555";
break;
case 3:
format="8888";
break;
}
 
kgem_debug_print(data, offset, 1, "format %s, rop %x, pitch %d, "
"clipping %sabled\n", format,
(data[1] >> 16) & 0xff,
(short)(data[1] & 0xffff),
data[1] & (1 << 30) ? "en" : "dis");
kgem_debug_print(data, offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
kgem_debug_print(data, offset, 3, "(%d,%d)\n",
data[3] & 0xffff, data[3] >> 16);
reloc = kgem_debug_get_reloc_entry(kgem, offset+4);
kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d, tiling? %d)]\n",
data[4],
reloc->target_handle, reloc->delta,
reloc->read_domains, reloc->write_domain,
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
kgem_debug_print(data, offset, 5, "color\n");
assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
return len;
 
case 0x53:
kgem_debug_print(data, offset, 0,
"XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
"src tile %d, dst tile %d)\n",
(data[0] & (1 << 20)) ? "en" : "dis",
(data[0] & (1 << 21)) ? "en" : "dis",
(data[0] >> 15) & 1,
(data[0] >> 11) & 1);
 
len = (data[0] & 0x000000ff) + 2;
assert(len == 8);
 
switch ((data[1] >> 24) & 0x3) {
case 0:
format="8";
break;
case 1:
format="565";
break;
case 2:
format="1555";
break;
case 3:
format="8888";
break;
}
 
kgem_debug_print(data, offset, 1, "format %s, rop %x, dst pitch %d, "
"clipping %sabled\n", format,
(data[1] >> 16) & 0xff,
(short)(data[1] & 0xffff),
data[1] & (1 << 30) ? "en" : "dis");
kgem_debug_print(data, offset, 2, "dst (%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
kgem_debug_print(data, offset, 3, "dst (%d,%d)\n",
data[3] & 0xffff, data[3] >> 16);
reloc = kgem_debug_get_reloc_entry(kgem, offset+4);
assert(reloc);
kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x, (fenced? %d, tiling? %d)]\n",
data[4],
reloc->target_handle, reloc->delta,
reloc->read_domains, reloc->write_domain,
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
 
kgem_debug_print(data, offset, 5, "src (%d,%d)\n",
data[5] & 0xffff, data[5] >> 16);
kgem_debug_print(data, offset, 6, "src pitch %d\n",
(short)(data[6] & 0xffff));
reloc = kgem_debug_get_reloc_entry(kgem, offset+7);
assert(reloc);
kgem_debug_print(data, offset, 7, "src offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d, tiling? %d)]\n",
data[7],
reloc->target_handle, reloc->delta,
reloc->read_domains, reloc->write_domain,
kgem_debug_handle_is_fenced(kgem, reloc->target_handle),
kgem_debug_handle_tiling(kgem, reloc->target_handle));
assert(kgem->gen >= 040 ||
kgem_debug_handle_is_fenced(kgem, reloc->target_handle));
 
return len;
}
 
for (op = 0; op < ARRAY_SIZE(opcodes); op++) {
if ((data[0] & 0x1fc00000) >> 22 == opcodes[op].opcode) {
unsigned int i;
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name);
if (opcodes[op].max_len > 1) {
len = (data[0] & 0x000000ff) + 2;
assert(len >= opcodes[op].min_len &&
len <= opcodes[op].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
}
 
kgem_debug_print(data, offset, 0, "2D UNKNOWN\n");
assert(0);
return 1;
}
 
static int (*decode_3d(int gen))(struct kgem*, uint32_t)
{
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_decode_3d;
} else if (gen >= 060) {
return kgem_gen6_decode_3d;
} else if (gen >= 050) {
return kgem_gen5_decode_3d;
} else if (gen >= 040) {
return kgem_gen4_decode_3d;
} else if (gen >= 030) {
return kgem_gen3_decode_3d;
}
assert(0);
}
 
static void (*finish_state(int gen))(struct kgem*)
{
if (gen >= 0100) {
} else if (gen >= 070) {
return kgem_gen7_finish_state;
} else if (gen >= 060) {
return kgem_gen6_finish_state;
} else if (gen >= 050) {
return kgem_gen5_finish_state;
} else if (gen >= 040) {
return kgem_gen4_finish_state;
} else if (gen >= 030) {
return kgem_gen3_finish_state;
}
assert(0);
}
 
void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
{
int (*const decode[])(struct kgem *, uint32_t) = {
decode_mi,
decode_nop,
decode_2d,
decode_3d(kgem->gen),
};
uint32_t offset = 0;
 
while (offset < nbatch) {
int class = (kgem->batch[offset] & 0xe0000000) >> 29;
assert(class < ARRAY_SIZE(decode));
offset += decode[class](kgem, offset);
}
 
finish_state(kgem->gen)(kgem);
}
/drivers/video/Intel-2D/sna/kgem_debug.h
0,0 → 1,34
#ifndef KGEM_DEBUG_H
#define KGEM_DEBUG_H
 
void
kgem_debug_print(const uint32_t *data,
uint32_t offset, unsigned int index,
const char *fmt, ...);
 
struct drm_i915_gem_relocation_entry *
kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset);
 
struct kgem_bo *
kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem,
struct drm_i915_gem_relocation_entry *reloc);
 
int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen7_finish_state(struct kgem *kgem);
 
int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen6_finish_state(struct kgem *kgem);
 
int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen5_finish_state(struct kgem *kgem);
 
int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen4_finish_state(struct kgem *kgem);
 
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen3_finish_state(struct kgem *kgem);
 
int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset);
void kgem_gen2_finish_state(struct kgem *kgem);
 
#endif
/drivers/video/Intel-2D/sna/kgem_debug_gen3.c
0,0 → 1,1599
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen3_render.h"
 
#include "kgem_debug.h"
 
enum type {
T_FLOAT32,
T_FLOAT16,
};
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb;
struct vertex_elements {
int offset;
bool valid;
enum type type;
int size;
uint8_t swizzle[4];
} ve[33];
int num_ve;
} state;
 
static float int_as_float(int i)
{
union {
float f;
int i;
} x;
x.i = i;
return x.f;
}
 
static void gen3_update_vertex_buffer_addr(struct kgem *kgem,
uint32_t offset)
{
uint32_t handle;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
offset *= sizeof(uint32_t);
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == offset)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
state.vb.current = bo;
state.vb.base = base;
state.vb.ptr = ptr;
}
 
static void gen3_update_vertex_buffer_pitch(struct kgem *kgem,
uint32_t offset)
{
state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f;
state.vb.pitch *= sizeof(uint32_t);
}
 
static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data)
{
state.ve[1].valid = 1;
 
switch ((data >> 6) & 7) {
case 1:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 3;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 1;
state.ve[1].swizzle[3] = 3;
break;
case 2:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 4;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 1;
state.ve[1].swizzle[3] = 1;
break;
case 3:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 2;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 2;
state.ve[1].swizzle[3] = 3;
break;
case 4:
state.ve[1].type = T_FLOAT32;
state.ve[1].size = 3;
state.ve[1].swizzle[0] = 1;
state.ve[1].swizzle[1] = 1;
state.ve[1].swizzle[2] = 3;
state.ve[1].swizzle[3] = 1;
break;
}
 
state.ve[2].valid = 0;
state.ve[3].valid = 0;
}
 
static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data)
{
int id;
for (id = 0; id < 8; id++) {
uint32_t fmt = (data >> (id*4)) & 0xf;
int width;
 
state.ve[id+4].valid = fmt != 0xf;
 
width = 0;
switch (fmt) {
case 0:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 2;
break;
case 1:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 3;
break;
case 2:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 4;
break;
case 3:
state.ve[id+4].type = T_FLOAT32;
width = state.ve[id+4].size = 1;
break;
case 4:
state.ve[id+4].type = T_FLOAT16;
width = state.ve[id+4].size = 2;
break;
case 5:
state.ve[id+4].type = T_FLOAT16;
width = state.ve[id+4].size = 4;
break;
}
 
state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2;
state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2;
state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2;
state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2;
}
}
 
static void gen3_update_vertex_elements_offsets(struct kgem *kgem)
{
int i, offset;
 
for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) {
if (!state.ve[i].valid)
continue;
 
state.ve[i].offset = offset;
offset += 4 * state.ve[i].size;
state.num_ve = i;
}
}
 
static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < max-1)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case T_FLOAT32:
vertices_float32_out(ve, ptr, ve->size);
break;
case T_FLOAT16:
//vertices_float16_out(ve, ptr, ve->size);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
const struct vertex_buffer *vb = &state.vb;
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static int inline_vertex_out(struct kgem *kgem, void *base)
{
const struct vertex_buffer *vb = &state.vb;
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const void *ptr = (char *)base + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
 
return vb->pitch;
}
 
static int
gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
uint32_t opcode;
 
opcode = (data[0] & 0x00f80000) >> 19;
 
switch (opcode) {
case 0x11:
kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
return 1;
case 0x10:
kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n",
data[0]&1?"enabled":"disabled");
return 1;
case 0x01:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
return 1;
case 0x0a:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n");
return 1;
case 0x05:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
return 1;
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
opcode);
assert(0);
return 1;
}
 
/** Sets the string dstname to describe the destination of the PS instruction */
static void
gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
{
uint32_t a0 = data[i];
int dst_nr = (a0 >> 14) & 0xf;
char dstmask[8];
const char *sat;
 
if (do_mask) {
if (((a0 >> 10) & 0xf) == 0xf) {
dstmask[0] = 0;
} else {
int dstmask_index = 0;
 
dstmask[dstmask_index++] = '.';
if (a0 & (1 << 10))
dstmask[dstmask_index++] = 'x';
if (a0 & (1 << 11))
dstmask[dstmask_index++] = 'y';
if (a0 & (1 << 12))
dstmask[dstmask_index++] = 'z';
if (a0 & (1 << 13))
dstmask[dstmask_index++] = 'w';
dstmask[dstmask_index++] = 0;
}
 
if (a0 & (1 << 22))
sat = ".sat";
else
sat = "";
} else {
dstmask[0] = 0;
sat = "";
}
 
switch ((a0 >> 19) & 0x7) {
case 0:
assert(dst_nr <= 15);
sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
break;
case 4:
assert(dst_nr == 0);
sprintf(dstname, "oC%s%s", dstmask, sat);
break;
case 5:
assert(dst_nr == 0);
sprintf(dstname, "oD%s%s", dstmask, sat);
break;
case 6:
assert(dst_nr <= 3);
sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
break;
default:
sprintf(dstname, "RESERVED");
break;
}
}
 
static const char *
gen3_get_channel_swizzle(uint32_t select)
{
switch (select & 0x7) {
case 0:
return (select & 8) ? "-x" : "x";
case 1:
return (select & 8) ? "-y" : "y";
case 2:
return (select & 8) ? "-z" : "z";
case 3:
return (select & 8) ? "-w" : "w";
case 4:
return (select & 8) ? "-0" : "0";
case 5:
return (select & 8) ? "-1" : "1";
default:
return (select & 8) ? "-bad" : "bad";
}
}
 
static void
gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
{
switch (src_type) {
case 0:
sprintf(name, "R%d", src_nr);
assert(src_nr <= 15);
break;
case 1:
if (src_nr < 8)
sprintf(name, "T%d", src_nr);
else if (src_nr == 8)
sprintf(name, "DIFFUSE");
else if (src_nr == 9)
sprintf(name, "SPECULAR");
else if (src_nr == 10)
sprintf(name, "FOG");
else {
assert(0);
sprintf(name, "RESERVED");
}
break;
case 2:
sprintf(name, "C%d", src_nr);
assert(src_nr <= 31);
break;
case 4:
sprintf(name, "oC");
assert(src_nr == 0);
break;
case 5:
sprintf(name, "oD");
assert(src_nr == 0);
break;
case 6:
sprintf(name, "U%d", src_nr);
assert(src_nr <= 3);
break;
default:
sprintf(name, "RESERVED");
assert(0);
break;
}
}
 
static void
gen3_get_instruction_src0(uint32_t *data, int i, char *srcname)
{
uint32_t a0 = data[i];
uint32_t a1 = data[i + 1];
int src_nr = (a0 >> 2) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_src1(uint32_t *data, int i, char *srcname)
{
uint32_t a1 = data[i + 1];
uint32_t a2 = data[i + 2];
int src_nr = (a1 >> 8) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_src2(uint32_t *data, int i, char *srcname)
{
uint32_t a2 = data[i + 2];
int src_nr = (a2 >> 16) & 0x1f;
const char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf);
const char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf);
const char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf);
const char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf);
char swizzle[100];
 
gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
if (strcmp(swizzle, ".xyzw") != 0)
strcat(srcname, swizzle);
}
 
static void
gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
{
switch (src_type) {
case 0:
sprintf(name, "R%d", src_nr);
assert(src_nr <= 15);
break;
case 1:
if (src_nr < 8)
sprintf(name, "T%d", src_nr);
else if (src_nr == 8)
sprintf(name, "DIFFUSE");
else if (src_nr == 9)
sprintf(name, "SPECULAR");
else if (src_nr == 10)
sprintf(name, "FOG");
else {
assert(0);
sprintf(name, "RESERVED");
}
break;
case 4:
sprintf(name, "oC");
assert(src_nr == 0);
break;
case 5:
sprintf(name, "oD");
assert(src_nr == 0);
break;
default:
assert(0);
sprintf(name, "RESERVED");
break;
}
}
 
static void
gen3_decode_alu1(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix,
op_name, dst, src0);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_alu2(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100], src1[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
gen3_get_instruction_src1(data, i, src1);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
op_name, dst, src0, src1);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_alu3(uint32_t *data, uint32_t offset,
int i, char *instr_prefix, const char *op_name)
{
char dst[100], src0[100], src1[100], src2[100];
 
gen3_get_instruction_dst(data, i, dst, 1);
gen3_get_instruction_src0(data, i, src0);
gen3_get_instruction_src1(data, i, src1);
gen3_get_instruction_src2(data, i, src2);
 
kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
op_name, dst, src0, src1, src2);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix,
const char *tex_name)
{
uint32_t t0 = data[i];
uint32_t t1 = data[i + 1];
char dst_name[100];
char addr_name[100];
int sampler_nr;
 
gen3_get_instruction_dst(data, i, dst_name, 0);
gen3_get_instruction_addr((t1 >> 24) & 0x7,
(t1 >> 17) & 0xf,
addr_name);
sampler_nr = t0 & 0xf;
 
kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
tex_name, dst_name, sampler_nr, addr_name);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
 
static void
gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix)
{
uint32_t d0 = data[i];
const char *sampletype;
int dcl_nr = (d0 >> 14) & 0xf;
const char *dcl_x = d0 & (1 << 10) ? "x" : "";
const char *dcl_y = d0 & (1 << 11) ? "y" : "";
const char *dcl_z = d0 & (1 << 12) ? "z" : "";
const char *dcl_w = d0 & (1 << 13) ? "w" : "";
char dcl_mask[10];
 
switch ((d0 >> 19) & 0x3) {
case 1:
sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
assert (strcmp(dcl_mask, "."));
 
assert(dcl_nr <= 10);
if (dcl_nr < 8) {
if (strcmp(dcl_mask, ".x") != 0 &&
strcmp(dcl_mask, ".xy") != 0 &&
strcmp(dcl_mask, ".xz") != 0 &&
strcmp(dcl_mask, ".w") != 0 &&
strcmp(dcl_mask, ".xyzw") != 0) {
assert(0);
}
kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix,
dcl_nr, dcl_mask);
} else {
if (strcmp(dcl_mask, ".xz") == 0)
assert(0);
else if (strcmp(dcl_mask, ".xw") == 0)
assert(0);
else if (strcmp(dcl_mask, ".xzw") == 0)
assert(0);
 
if (dcl_nr == 8) {
kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
dcl_mask);
} else if (dcl_nr == 9) {
kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
dcl_mask);
} else if (dcl_nr == 10) {
kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix,
dcl_mask);
}
}
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
case 3:
switch ((d0 >> 22) & 0x3) {
case 0:
sampletype = "2D";
break;
case 1:
sampletype = "CUBE";
break;
case 2:
sampletype = "3D";
break;
default:
sampletype = "RESERVED";
break;
}
assert(dcl_nr <= 15);
kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix,
dcl_nr, sampletype);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
default:
kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
}
}
 
static void
gen3_decode_instruction(uint32_t *data, uint32_t offset,
int i, char *instr_prefix)
{
switch ((data[i] >> 24) & 0x1f) {
case 0x0:
kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
case 0x01:
gen3_decode_alu2(data, offset, i, instr_prefix, "ADD");
break;
case 0x02:
gen3_decode_alu1(data, offset, i, instr_prefix, "MOV");
break;
case 0x03:
gen3_decode_alu2(data, offset, i, instr_prefix, "MUL");
break;
case 0x04:
gen3_decode_alu3(data, offset, i, instr_prefix, "MAD");
break;
case 0x05:
gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD");
break;
case 0x06:
gen3_decode_alu2(data, offset, i, instr_prefix, "DP3");
break;
case 0x07:
gen3_decode_alu2(data, offset, i, instr_prefix, "DP4");
break;
case 0x08:
gen3_decode_alu1(data, offset, i, instr_prefix, "FRC");
break;
case 0x09:
gen3_decode_alu1(data, offset, i, instr_prefix, "RCP");
break;
case 0x0a:
gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ");
break;
case 0x0b:
gen3_decode_alu1(data, offset, i, instr_prefix, "EXP");
break;
case 0x0c:
gen3_decode_alu1(data, offset, i, instr_prefix, "LOG");
break;
case 0x0d:
gen3_decode_alu2(data, offset, i, instr_prefix, "CMP");
break;
case 0x0e:
gen3_decode_alu2(data, offset, i, instr_prefix, "MIN");
break;
case 0x0f:
gen3_decode_alu2(data, offset, i, instr_prefix, "MAX");
break;
case 0x10:
gen3_decode_alu1(data, offset, i, instr_prefix, "FLR");
break;
case 0x11:
gen3_decode_alu1(data, offset, i, instr_prefix, "MOD");
break;
case 0x12:
gen3_decode_alu1(data, offset, i, instr_prefix, "TRC");
break;
case 0x13:
gen3_decode_alu2(data, offset, i, instr_prefix, "SGE");
break;
case 0x14:
gen3_decode_alu2(data, offset, i, instr_prefix, "SLT");
break;
case 0x15:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD");
break;
case 0x16:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP");
break;
case 0x17:
gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB");
break;
case 0x19:
gen3_decode_dcl(data, offset, i, instr_prefix);
break;
default:
kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
kgem_debug_print(data, offset, i++, "%s\n", instr_prefix);
break;
}
}
 
static const char *
gen3_decode_compare_func(uint32_t op)
{
switch (op&0x7) {
case 0: return "always";
case 1: return "never";
case 2: return "less";
case 3: return "equal";
case 4: return "lequal";
case 5: return "greater";
case 6: return "notequal";
case 7: return "gequal";
}
return "";
}
 
static const char *
gen3_decode_stencil_op(uint32_t op)
{
switch (op&0x7) {
case 0: return "keep";
case 1: return "zero";
case 2: return "replace";
case 3: return "incr_sat";
case 4: return "decr_sat";
case 5: return "greater";
case 6: return "incr";
case 7: return "decr";
}
return "";
}
 
#if 0
/* part of MODES_4 */
static const char *
gen3_decode_logic_op(uint32_t op)
{
switch (op&0xf) {
case 0: return "clear";
case 1: return "nor";
case 2: return "and_inv";
case 3: return "copy_inv";
case 4: return "and_rvrse";
case 5: return "inv";
case 6: return "xor";
case 7: return "nand";
case 8: return "and";
case 9: return "equiv";
case 10: return "noop";
case 11: return "or_inv";
case 12: return "copy";
case 13: return "or_rvrse";
case 14: return "or";
case 15: return "set";
}
return "";
}
#endif
 
static const char *
gen3_decode_blend_fact(uint32_t op)
{
switch (op&0xf) {
case 1: return "zero";
case 2: return "one";
case 3: return "src_colr";
case 4: return "inv_src_colr";
case 5: return "src_alpha";
case 6: return "inv_src_alpha";
case 7: return "dst_alpha";
case 8: return "inv_dst_alpha";
case 9: return "dst_colr";
case 10: return "inv_dst_colr";
case 11: return "src_alpha_sat";
case 12: return "cnst_colr";
case 13: return "inv_cnst_colr";
case 14: return "cnst_alpha";
case 15: return "inv_const_alpha";
}
return "";
}
 
static const char *
decode_tex_coord_mode(uint32_t mode)
{
switch (mode&0x7) {
case 0: return "wrap";
case 1: return "mirror";
case 2: return "clamp_edge";
case 3: return "cube";
case 4: return "clamp_border";
case 5: return "mirror_once";
}
return "";
}
 
static const char *
gen3_decode_sample_filter(uint32_t mode)
{
switch (mode&0x7) {
case 0: return "nearest";
case 1: return "linear";
case 2: return "anisotropic";
case 3: return "4x4_1";
case 4: return "4x4_2";
case 5: return "4x4_flat";
case 6: return "6x5_mono";
}
return "";
}
 
static int
gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset)
{
const uint32_t *data = kgem->batch + offset;
int len, i, word;
 
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
for (word = 0; word <= 8; word++) {
if (data[0] & (1 << (4 + word))) {
switch (word) {
case 0:
kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n",
data[i]&(~1),data[i]&1?", auto cache invalidate disabled":"");
gen3_update_vertex_buffer_addr(kgem, offset + i);
break;
case 1:
kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n",
(data[i]>>24)&0x3f,(data[i]>>16)&0x3f);
gen3_update_vertex_buffer_pitch(kgem, offset + i);
break;
case 2:
{
char buf[200];
int len = 0;
int tex_num;
for (tex_num = 0; tex_num < 8; tex_num++) {
switch((data[i]>>tex_num*4)&0xf) {
case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break;
case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break;
case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break;
case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break;
case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break;
case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break;
case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break;
}
}
kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf);
gen3_update_vertex_texcoords(kgem, data[i]);
}
 
break;
case 3:
kgem_debug_print(data, offset, i, "S3: not documented\n");
break;
case 4:
{
const char *cullmode = "";
const char *vfmt_xyzw = "";
switch((data[i]>>13)&0x3) {
case 0: cullmode = "both"; break;
case 1: cullmode = "none"; break;
case 2: cullmode = "cw"; break;
case 3: cullmode = "ccw"; break;
}
switch(data[i] & (7<<6 | 1<<2)) {
case 1<<6: vfmt_xyzw = "XYZ,"; break;
case 2<<6: vfmt_xyzw = "XYZW,"; break;
case 3<<6: vfmt_xyzw = "XY,"; break;
case 4<<6: vfmt_xyzw = "XYW,"; break;
case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break;
case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break;
case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break;
case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break;
}
kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f,"
"%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s%s%s "
"%s%s%s\n",
(data[i]>>23)&0x1ff,
((data[i]>>19)&0xf) / 2.0,
data[i]&(0xf<<15)?" flatshade=":"",
data[i]&(1<<18)?"Alpha,":"",
data[i]&(1<<17)?"Fog,":"",
data[i]&(1<<16)?"Specular,":"",
data[i]&(1<<15)?"Color,":"",
cullmode,
data[i]&(1<<12)?"PointWidth,":"",
data[i]&(1<<11)?"SpecFog,":"",
data[i]&(1<<10)?"Color,":"",
data[i]&(1<<9)?"DepthOfs,":"",
vfmt_xyzw,
data[i]&(1<<9)?"FogParam,":"",
data[i]&(1<<5)?"force default diffuse, ":"",
data[i]&(1<<4)?"force default specular, ":"",
data[i]&(1<<3)?"local depth ofs enable, ":"",
data[i]&(1<<1)?"point sprite enable, ":"",
data[i]&(1<<0)?"line AA enable, ":"");
gen3_update_vertex_elements(kgem, data[i]);
break;
}
case 5:
{
kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s"
"%s%s%s%s stencil_ref=0x%x, stencil_test=%s, "
"stencil_fail=%s, stencil_pass_z_fail=%s, "
"stencil_pass_z_pass=%s, %s%s%s%s\n",
data[i]&(0xf<<28)?" write_disable=":"",
data[i]&(1<<31)?"Alpha,":"",
data[i]&(1<<30)?"Red,":"",
data[i]&(1<<29)?"Green,":"",
data[i]&(1<<28)?"Blue,":"",
data[i]&(1<<27)?" force default point size,":"",
data[i]&(1<<26)?" last pixel enable,":"",
data[i]&(1<<25)?" global depth ofs enable,":"",
data[i]&(1<<24)?" fog enable,":"",
(data[i]>>16)&0xff,
gen3_decode_compare_func(data[i]>>13),
gen3_decode_stencil_op(data[i]>>10),
gen3_decode_stencil_op(data[i]>>7),
gen3_decode_stencil_op(data[i]>>4),
data[i]&(1<<3)?"stencil write enable, ":"",
data[i]&(1<<2)?"stencil test enable, ":"",
data[i]&(1<<1)?"color dither enable, ":"",
data[i]&(1<<0)?"logicop enable, ":"");
}
break;
case 6:
kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, "
"depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, "
"%s%stristrip_provoking_vertex=%i\n",
data[i]&(1<<31)?"alpha test enable, ":"",
gen3_decode_compare_func(data[i]>>28),
data[i]&(0xff<<20),
gen3_decode_compare_func(data[i]>>16),
data[i]&(1<<15)?"cbuf blend enable, ":"",
gen3_decode_blend_fact(data[i]>>8),
gen3_decode_blend_fact(data[i]>>4),
data[i]&(1<<3)?"depth write enable, ":"",
data[i]&(1<<2)?"cbuf write enable, ":"",
data[i]&(0x3));
break;
case 7:
kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]);
break;
}
i++;
}
}
 
assert(len == i);
return len;
}
 
static int
gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
unsigned int len, i, c, idx, word, map, sampler, instr;
const char *format, *zformat, *type;
uint32_t opcode;
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes_3d_1d[] = {
{ 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
{ 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
{ 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
{ 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
{ 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
{ 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
{ 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
{ 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
{ 0x89, 4, 4, "3DSTATE_FOG_MODE" },
{ 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
{ 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
}, *opcode_3d_1d;
 
opcode = (data[0] & 0x00ff0000) >> 16;
 
switch (opcode) {
case 0x07:
/* This instruction is unusual. A 0 length means just 1 DWORD instead of
* 2. The 0 length is specified in one place to be unsupported, but
* stated to be required in another, and 0 length LOAD_INDIRECTs appear
* to cause no harm at least.
*/
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n");
len = (data[0] & 0x000000ff) + 1;
i = 1;
if (data[0] & (0x01 << 8)) {
kgem_debug_print(data, offset, i++, "SIS.0\n");
kgem_debug_print(data, offset, i++, "SIS.1\n");
}
if (data[0] & (0x02 << 8)) {
kgem_debug_print(data, offset, i++, "DIS.0\n");
}
if (data[0] & (0x04 << 8)) {
kgem_debug_print(data, offset, i++, "SSB.0\n");
kgem_debug_print(data, offset, i++, "SSB.1\n");
}
if (data[0] & (0x08 << 8)) {
kgem_debug_print(data, offset, i++, "MSB.0\n");
kgem_debug_print(data, offset, i++, "MSB.1\n");
}
if (data[0] & (0x10 << 8)) {
kgem_debug_print(data, offset, i++, "PSP.0\n");
kgem_debug_print(data, offset, i++, "PSP.1\n");
}
if (data[0] & (0x20 << 8)) {
kgem_debug_print(data, offset, i++, "PSC.0\n");
kgem_debug_print(data, offset, i++, "PSC.1\n");
}
assert(len == i);
return len;
case 0x04:
return gen3_decode_load_state_immediate_1(kgem, offset);
case 0x03:
kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
len = (data[0] & 0x0000000f) + 2;
i = 1;
for (word = 6; word <= 14; word++) {
if (data[0] & (1 << word)) {
if (word == 6)
kgem_debug_print(data, offset, i++, "TBCF\n");
else if (word >= 7 && word <= 10) {
kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7);
kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7);
} else if (word >= 11 && word <= 14) {
kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n",
word - 11,
data[i]&0xfffffffe,
data[i]&1?"use fence":"");
i++;
kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n",
word - 11,
data[i]>>21, (data[i]>>10)&0x3ff,
data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):"");
i++;
kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n",
word - 11,
((data[i]>>21) + 1)*4);
i++;
kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11);
kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11);
}
}
}
assert(len == i);
return len;
case 0x00:
kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n");
len = (data[0] & 0x0000003f) + 2;
kgem_debug_print(data, offset, 1, "mask\n");
 
i = 2;
for (map = 0; map <= 15; map++) {
if (data[1] & (1 << map)) {
int width, height, pitch, dword;
struct drm_i915_gem_relocation_entry *reloc;
const char *tiling;
 
reloc = kgem_debug_get_reloc_entry(kgem, &data[i] - kgem->batch);
assert(reloc->target_handle);
 
dword = data[i];
kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s, handle=%d\n", map,
dword&(1<<31)?"untrusted surface, ":"",
dword&(1<<1)?"vertical line stride enable, ":"",
dword&(1<<0)?"vertical ofs enable, ":"",
reloc->target_handle);
 
dword = data[i];
width = ((dword >> 10) & ((1 << 11) - 1))+1;
height = ((dword >> 21) & ((1 << 11) - 1))+1;
 
tiling = "none";
if (dword & (1 << 2))
tiling = "fenced";
else if (dword & (1 << 1))
tiling = dword & (1 << 0) ? "Y" : "X";
type = " BAD";
format = " (invalid)";
switch ((dword>>7) & 0x7) {
case 1:
type = "8";
switch ((dword>>3) & 0xf) {
case 0: format = "I"; break;
case 1: format = "L"; break;
case 4: format = "A"; break;
case 5: format = " mono"; break;
}
break;
case 2:
type = "16";
switch ((dword>>3) & 0xf) {
case 0: format = " rgb565"; break;
case 1: format = " argb1555"; break;
case 2: format = " argb4444"; break;
case 3: format = " ay88"; break;
case 5: format = " 88dvdu"; break;
case 6: format = " bump655"; break;
case 7: format = "I"; break;
case 8: format = "L"; break;
case 9: format = "A"; break;
}
break;
case 3:
type = "32";
switch ((dword>>3) & 0xf) {
case 0: format = " argb8888"; break;
case 1: format = " abgr8888"; break;
case 2: format = " xrgb8888"; break;
case 3: format = " xbgr8888"; break;
case 4: format = " qwvu8888"; break;
case 5: format = " axvu8888"; break;
case 6: format = " lxvu8888"; break;
case 7: format = " xlvu8888"; break;
case 8: format = " argb2101010"; break;
case 9: format = " abgr2101010"; break;
case 10: format = " awvu2101010"; break;
case 11: format = " gr1616"; break;
case 12: format = " vu1616"; break;
case 13: format = " xI824"; break;
case 14: format = " xA824"; break;
case 15: format = " xL824"; break;
}
break;
case 5:
type = "422";
switch ((dword>>3) & 0xf) {
case 0: format = " yuv_swapy"; break;
case 1: format = " yuv"; break;
case 2: format = " yuv_swapuv"; break;
case 3: format = " yuv_swapuvy"; break;
}
break;
case 6:
type = "compressed";
switch ((dword>>3) & 0x7) {
case 0: format = " dxt1"; break;
case 1: format = " dxt2_3"; break;
case 2: format = " dxt4_5"; break;
case 3: format = " fxt1"; break;
case 4: format = " dxt1_rb"; break;
}
break;
case 7:
type = "4b indexed";
switch ((dword>>3) & 0xf) {
case 7: format = " argb8888"; break;
}
break;
default:
format = "BAD";
break;
}
dword = data[i];
kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n",
map, width, height, type, format, tiling,
dword&(1<<9)?" palette select":"");
 
dword = data[i];
pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n",
map, pitch,
(dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f,
dword&(1<<8)?"miplayout legacy":"miplayout right");
}
}
assert(len == i);
return len;
case 0x06:
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
len = (data[0] & 0x000000ff) + 2;
 
i = 2;
for (c = 0; c <= 31; c++) {
if (data[1] & (1 << c)) {
kgem_debug_print(data, offset, i, "C%d.X = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.Y = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.Z = %f\n",
c, int_as_float(data[i]));
i++;
kgem_debug_print(data, offset, i, "C%d.W = %f\n",
c, int_as_float(data[i]));
i++;
}
}
assert(len == i);
return len;
case 0x05:
kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
len = (data[0] & 0x000000ff) + 2;
assert(((len-1) % 3) == 0);
assert(len <= 370);
i = 1;
for (instr = 0; instr < (len - 1) / 3; instr++) {
char instr_prefix[10];
 
sprintf(instr_prefix, "PS%03d", instr);
gen3_decode_instruction(data, offset, i, instr_prefix);
i += 3;
}
return len;
case 0x01:
kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n");
kgem_debug_print(data, offset, 1, "mask\n");
len = (data[0] & 0x0000003f) + 2;
i = 2;
for (sampler = 0; sampler <= 15; sampler++) {
if (data[1] & (1 << sampler)) {
uint32_t dword;
const char *mip_filter = "";
dword = data[i];
switch ((dword>>20)&0x3) {
case 0: mip_filter = "none"; break;
case 1: mip_filter = "nearest"; break;
case 3: mip_filter = "linear"; break;
}
kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s "
"base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s "
"lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler,
dword&(1<<31)?" reverse gamma,":"",
dword&(1<<30)?" packed2planar,":"",
dword&(1<<29)?" colorspace conversion,":"",
(dword>>22)&0x1f,
mip_filter,
gen3_decode_sample_filter(dword>>17),
gen3_decode_sample_filter(dword>>14),
((dword>>5)&0x1ff)/(0x10*1.0),
dword&(1<<4)?" shadow,":"",
dword&(1<<3)?4:2,
gen3_decode_compare_func(dword));
dword = data[i];
kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s "
"tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n",
sampler, ((dword>>24)&0xff)/(0x10*1.0),
dword&(1<<17)?" kill pixel enable,":"",
decode_tex_coord_mode(dword>>12),
decode_tex_coord_mode(dword>>9),
decode_tex_coord_mode(dword>>6),
dword&(1<<5)?" normalized coords,":"",
(dword>>1)&0xf,
dword&(1<<0)?" deinterlacer,":"");
kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n",
sampler);
}
}
assert(len == i);
return len;
case 0x85:
len = (data[0] & 0x0000000f) + 2;
assert(len == 2);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_DEST_BUFFER_VARIABLES\n");
 
switch ((data[1] >> 8) & 0xf) {
case 0x0: format = "g8"; break;
case 0x1: format = "x1r5g5b5"; break;
case 0x2: format = "r5g6b5"; break;
case 0x3: format = "a8r8g8b8"; break;
case 0x4: format = "ycrcb_swapy"; break;
case 0x5: format = "ycrcb_normal"; break;
case 0x6: format = "ycrcb_swapuv"; break;
case 0x7: format = "ycrcb_swapuvy"; break;
case 0x8: format = "a4r4g4b4"; break;
case 0x9: format = "a1r5g5b5"; break;
case 0xa: format = "a2r10g10b10"; break;
default: format = "BAD"; break;
}
switch ((data[1] >> 2) & 0x3) {
case 0x0: zformat = "u16"; break;
case 0x1: zformat = "f16"; break;
case 0x2: zformat = "u24x8"; break;
default: zformat = "BAD"; break;
}
kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n",
format, zformat,
(data[1] & (1 << 31)) ? "en" : "dis");
return len;
 
case 0x8e:
{
const char *name, *tiling;
 
len = (data[0] & 0x0000000f) + 2;
assert(len == 3);
 
switch((data[1] >> 24) & 0x7) {
case 0x3: name = "color"; break;
case 0x7: name = "depth"; break;
default: name = "unknown"; break;
}
 
tiling = "none";
if (data[1] & (1 << 23))
tiling = "fenced";
else if (data[1] & (1 << 22))
tiling = data[1] & (1 << 21) ? "Y" : "X";
 
kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n");
kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
 
kgem_debug_print(data, offset, 2, "address\n");
return len;
}
case 0x81:
len = (data[0] & 0x0000000f) + 2;
assert(len == 3);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_SCISSOR_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "(%d,%d)\n",
data[1] & 0xffff, data[1] >> 16);
kgem_debug_print(data, offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
 
return len;
case 0x80:
len = (data[0] & 0x0000000f) + 2;
assert(len == 5);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "%s\n",
data[1]&(1<<30)?"depth ofs disabled ":"");
kgem_debug_print(data, offset, 2, "(%d,%d)\n",
data[2] & 0xffff, data[2] >> 16);
kgem_debug_print(data, offset, 3, "(%d,%d)\n",
data[3] & 0xffff, data[3] >> 16);
kgem_debug_print(data, offset, 4, "(%d,%d)\n",
(int16_t)(data[4] & 0xffff),
(int16_t)(data[4] >> 16));
 
return len;
case 0x9c:
len = (data[0] & 0x0000000f) + 2;
assert(len == 7);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_CLEAR_PARAMETERS\n");
kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n",
data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT",
data[1]&(1<<2)?"color,":"",
data[1]&(1<<1)?"depth,":"",
data[1]&(1<<0)?"stencil,":"");
kgem_debug_print(data, offset, 2, "clear color\n");
kgem_debug_print(data, offset, 3, "clear depth/stencil\n");
kgem_debug_print(data, offset, 4, "color value (rgba8888)\n");
kgem_debug_print(data, offset, 5, "depth value %f\n",
int_as_float(data[5]));
kgem_debug_print(data, offset, 6, "clear stencil\n");
return len;
}
 
for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) {
opcode_3d_1d = &opcodes_3d_1d[idx];
if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
len = (data[0] & 0xf) + 2;
kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name);
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
assert(0);
return 1;
}
 
#define VERTEX_OUT(fmt, ...) do { \
kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
i++; \
} while (0)
 
static int
gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset)
{
uint32_t *data = kgem->batch + offset;
char immediate = (data[0] & (1 << 23)) == 0;
unsigned int len, i, ret;
const char *primtype;
unsigned int vertex = 0;
 
switch ((data[0] >> 18) & 0xf) {
case 0x0: primtype = "TRILIST"; break;
case 0x1: primtype = "TRISTRIP"; break;
case 0x2: primtype = "TRISTRIP_REVERSE"; break;
case 0x3: primtype = "TRIFAN"; break;
case 0x4: primtype = "POLYGON"; break;
case 0x5: primtype = "LINELIST"; break;
case 0x6: primtype = "LINESTRIP"; break;
case 0x7: primtype = "RECTLIST"; break;
case 0x8: primtype = "POINTLIST"; break;
case 0x9: primtype = "DIB"; break;
case 0xa: primtype = "CLEAR_RECT"; assert(0); break;
default: primtype = "unknown"; break;
}
 
gen3_update_vertex_elements_offsets(kgem);
 
/* XXX: 3DPRIM_DIB not supported */
if (immediate) {
len = (data[0] & 0x0003ffff) + 2;
kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype);
for (i = 1; i < len; ) {
ErrorF(" [%d]: ", vertex);
i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t);
ErrorF("\n");
vertex++;
}
 
ret = len;
} else {
/* indirect vertices */
len = data[0] & 0x0000ffff; /* index count */
if (data[0] & (1 << 17)) {
/* random vertex access */
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
assert(0);
if (len == 0) {
/* vertex indices continue until 0xffff is found */
} else {
/* fixed size vertex index buffer */
}
ret = (len + 1) / 2 + 1;
goto out;
} else {
/* sequential vertex access */
vertex = data[1] & 0xffff;
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE sequential indirect %s, %d starting from "
"%d\n", primtype, len, vertex);
kgem_debug_print(data, offset, 1, " start\n");
for (i = 0; i < len; i++) {
ErrorF(" [%d]: ", vertex);
indirect_vertex_out(kgem, vertex++);
ErrorF("\n");
}
ret = 2;
goto out;
}
}
 
out:
return ret;
}
 
int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
};
uint32_t *data = kgem->batch + offset;
uint32_t opcode;
unsigned int idx;
 
opcode = (data[0] & 0x1f000000) >> 24;
 
switch (opcode) {
case 0x1f:
return gen3_decode_3d_primitive(kgem, offset);
case 0x1d:
return gen3_decode_3d_1d(kgem, offset);
case 0x1c:
return gen3_decode_3d_1c(kgem, offset);
}
 
for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) {
if (opcode == opcodes[idx].opcode) {
unsigned int len = 1, i;
 
kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name);
if (opcodes[idx].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[idx].min_len ||
len <= opcodes[idx].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
return len;
}
}
 
kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
return 1;
}
 
 
void kgem_gen3_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/sna/kgem_debug_gen4.c
0,0 → 1,687
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen4_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[33];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[33];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
i = data[0] >> 27;
 
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 27;
state.ve[id].valid = !!(data[0] & (1 << 26));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN4_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN4_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN4_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN4_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 0;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[1]; n++) {
int v = data[2] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_965_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_965_prim_type(uint32_t data)
{
uint32_t primtype = (data >> 10) & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
#if 0
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + delta;
}
#endif
 
int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6000, 3, 3, "URB_FENCE" },
{ 0x6001, 2, 2, "CS_URB_FENCE" },
{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *desc1 = NULL;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6000:
assert(len == 3);
 
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
(data[0] >> 13) & 1 ? "cs " : "",
(data[0] >> 12) & 1 ? "vfe " : "",
(data[0] >> 11) & 1 ? "sf " : "",
(data[0] >> 10) & 1 ? "clip " : "",
(data[0] >> 9) & 1 ? "gs " : "",
(data[0] >> 8) & 1 ? "vs " : "");
kgem_debug_print(data, offset, 1,
"vs fence: %d, gs_fence: %d, clip_fence: %d\n",
data[1] & 0x3ff,
(data[1] >> 10) & 0x3ff,
(data[1] >> 20) & 0x3ff);
kgem_debug_print(data, offset, 2,
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
data[2] & 0x3ff,
(data[2] >> 10) & 0x3ff,
(data[2] >> 20) & 0x7ff);
return len;
 
case 0x6001:
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
(data[1] >> 4) & 0x1f,
(((data[1] >> 4) & 0x1f) + 1) * 64,
data[1] & 0x7);
return len;
case 0x6002:
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
(data[0] >> 8) & 1 ? "valid" : "invalid");
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
return len;
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 6);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "media");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "media");
 
return len;
 
case 0x7801:
assert(len == 6);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS\n");
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "CLIP binding table\n");
kgem_debug_print(data, offset, 4, "SF binding table\n");
kgem_debug_print(data, offset, 5, "WM binding table\n");
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen4_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 27,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
for (i = 1; i < len;) {
gen4_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 27,
data[i] & (1 << 26) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
"dst offset 0x%02x bytes\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
get_965_element_component(data[i], 3),
(data[i] & 0xff) * 4);
i++;
}
state.num_ve = (len - 1) / 2; /* XXX? */
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7900:
assert(len == 4);
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
data[1] & 0xffff,
(data[1] >> 16) & 0xffff);
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
data[2] & 0xffff,
(data[2] >> 16) & 0xffff);
kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
(int)data[3] & 0xffff,
((int)data[3] >> 16) & 0xffff);
return len;
 
case 0x7905:
assert(len == 7);
kgem_debug_print(data, offset, 0,
"3DSTATE_DEPTH_BUFFER\n");
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
get_965_surfacetype(data[1] >> 29),
get_965_depthformat((data[1] >> 18) & 0x7),
(data[1] & 0x0001ffff) + 1,
data[1] & (1 << 27) ? "" : "not ",
(data[1] & (1 << 22)) != 0,
(data[1] & (1 << 21)) != 0);
kgem_debug_print(data, offset, 2, "depth offset\n");
kgem_debug_print(data, offset, 3, "%dx%d\n",
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
kgem_debug_print(data, offset, 4, "volume depth\n");
kgem_debug_print(data, offset, 5, "\n");
kgem_debug_print(data, offset, 6, "\n");
return len;
 
case 0x7a00:
assert(len == 4 || len == 5);
switch ((data[1] >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
kgem_debug_print(data, offset, 1,
"%s, %scs stall, %stlb invalidate, "
"%ssync gfdt, %sdepth stall, %sRC write flush, "
"%sinst flush, %sTC flush\n",
desc1,
data[1] & (1 << 20) ? "" : "no ",
data[1] & (1 << 18) ? "" : "no ",
data[1] & (1 << 17) ? "" : "no ",
data[1] & (1 << 13) ? "" : "no ",
data[1] & (1 << 12) ? "" : "no ",
data[1] & (1 << 11) ? "" : "no ",
data[1] & (1 << 10) ? "" : "no ");
if (len == 5) {
kgem_debug_print(data, offset, 2, "destination address\n");
kgem_debug_print(data, offset, 3, "immediate dword low\n");
kgem_debug_print(data, offset, 4, "immediate dword high\n");
} else {
for (i = 2; i < len; i++) {
kgem_debug_print(data, offset, i, "\n");
}
}
return len;
 
case 0x7b00:
assert(len == 6);
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE: %s %s\n",
get_965_prim_type(data[0]),
(data[0] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 1, "vertex count\n");
kgem_debug_print(data, offset, 2, "start vertex\n");
kgem_debug_print(data, offset, 3, "instance count\n");
kgem_debug_print(data, offset, 4, "start instance\n");
kgem_debug_print(data, offset, 5, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode)
break;
 
assert(i < ARRAY_SIZE(opcodes));
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen4_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/sna/kgem_debug_gen5.c
0,0 → 1,662
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
 
#include "gen5_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
int size;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[17];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[17];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
struct drm_i915_gem_relocation_entry *reloc;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i, size;
 
reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch);
if (reloc->target_handle == -1) {
base = kgem->batch;
size = kgem->nbatch * sizeof(uint32_t);
} else {
bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc);
base = kgem_bo_map__debug(kgem, bo);
size = kgem_bo_size(bo);
}
ptr = (char *)base + reloc->delta;
 
i = data[0] >> 27;
 
state.vb[i].handle = reloc->target_handle;
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
state.vb[i].size = size;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 27;
state.ve[id].valid = !!(data[0] & (1 << 26));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (o < max)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (o < max)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN5_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN5_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN5_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN5_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
assert(vb->pitch);
assert(ve->offset + v*vb->pitch < vb->size);
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[1]; n++) {
int v = data[2] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_965_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_965_prim_type(uint32_t data)
{
uint32_t primtype = (data >> 10) & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
#if 0
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map(kgem, bo, PROT_READ);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + delta;
}
#endif
 
int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6000, 3, 3, "URB_FENCE" },
{ 0x6001, 2, 2, "CS_URB_FENCE" },
{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" },
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *desc1 = NULL;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6000:
assert(len == 3);
 
kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n",
(data[0] >> 13) & 1 ? "cs " : "",
(data[0] >> 12) & 1 ? "vfe " : "",
(data[0] >> 11) & 1 ? "sf " : "",
(data[0] >> 10) & 1 ? "clip " : "",
(data[0] >> 9) & 1 ? "gs " : "",
(data[0] >> 8) & 1 ? "vs " : "");
kgem_debug_print(data, offset, 1,
"vs fence: %d, gs_fence: %d, clip_fence: %d\n",
data[1] & 0x3ff,
(data[1] >> 10) & 0x3ff,
(data[1] >> 20) & 0x3ff);
kgem_debug_print(data, offset, 2,
"sf fence: %d, vfe_fence: %d, cs_fence: %d\n",
data[2] & 0x3ff,
(data[2] >> 10) & 0x3ff,
(data[2] >> 20) & 0x7ff);
return len;
 
case 0x6001:
kgem_debug_print(data, offset, 0, "CS_URB_STATE\n");
kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n",
(data[1] >> 4) & 0x1f,
(((data[1] >> 4) & 0x1f) + 1) * 64,
data[1] & 0x7);
return len;
case 0x6002:
kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n",
(data[0] >> 8) & 1 ? "valid" : "invalid");
kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n",
data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64);
return len;
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 8);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "media");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "media");
state_max_out(data, offset, i++, "instruction");
 
return len;
 
case 0x7801:
assert(len == 6);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS\n");
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "CLIP binding table\n");
kgem_debug_print(data, offset, 4, "SF binding table\n");
kgem_debug_print(data, offset, 5, "WM binding table\n");
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen5_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 27,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
memset(state.ve, 0, sizeof(state.ve)); /* XXX? */
for (i = 1; i < len;) {
gen5_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i,
"buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 27,
data[i] & (1 << 26) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s)\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
get_965_element_component(data[i], 3));
i++;
}
state.num_ve = (len - 1) / 2; /* XXX? */
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7900:
assert(len == 4);
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "top left: %d,%d\n",
data[1] & 0xffff,
(data[1] >> 16) & 0xffff);
kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n",
data[2] & 0xffff,
(data[2] >> 16) & 0xffff);
kgem_debug_print(data, offset, 3, "origin: %d,%d\n",
(int)data[3] & 0xffff,
((int)data[3] >> 16) & 0xffff);
return len;
 
case 0x7905:
assert(len == 7);
kgem_debug_print(data, offset, 0,
"3DSTATE_DEPTH_BUFFER\n");
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
get_965_surfacetype(data[1] >> 29),
get_965_depthformat((data[1] >> 18) & 0x7),
(data[1] & 0x0001ffff) + 1,
data[1] & (1 << 27) ? "" : "not ",
(data[1] & (1 << 22)) != 0,
(data[1] & (1 << 21)) != 0);
kgem_debug_print(data, offset, 2, "depth offset\n");
kgem_debug_print(data, offset, 3, "%dx%d\n",
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
kgem_debug_print(data, offset, 4, "volume depth\n");
kgem_debug_print(data, offset, 5, "\n");
kgem_debug_print(data, offset, 6, "\n");
return len;
 
case 0x7a00:
assert(len == 4 || len == 5);
switch ((data[1] >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
kgem_debug_print(data, offset, 1,
"%s, %scs stall, %stlb invalidate, "
"%ssync gfdt, %sdepth stall, %sRC write flush, "
"%sinst flush, %sTC flush\n",
desc1,
data[1] & (1 << 20) ? "" : "no ",
data[1] & (1 << 18) ? "" : "no ",
data[1] & (1 << 17) ? "" : "no ",
data[1] & (1 << 13) ? "" : "no ",
data[1] & (1 << 12) ? "" : "no ",
data[1] & (1 << 11) ? "" : "no ",
data[1] & (1 << 10) ? "" : "no ");
if (len == 5) {
kgem_debug_print(data, offset, 2, "destination address\n");
kgem_debug_print(data, offset, 3, "immediate dword low\n");
kgem_debug_print(data, offset, 4, "immediate dword high\n");
} else {
for (i = 2; i < len; i++) {
kgem_debug_print(data, offset, i, "\n");
}
}
return len;
 
case 0x7b00:
assert(len == 6);
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE: %s %s\n",
get_965_prim_type(data[0]),
(data[0] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 1, "vertex count\n");
kgem_debug_print(data, offset, 2, "start vertex\n");
kgem_debug_print(data, offset, 3, "instance count\n");
kgem_debug_print(data, offset, 4, "start instance\n");
kgem_debug_print(data, offset, 5, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode)
break;
 
assert(i < ARRAY_SIZE(opcodes));
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen5_finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
/drivers/video/Intel-2D/sna/kgem_debug_gen6.c
0,0 → 1,1075
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris"chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
//#include <sys/mman.h>
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
#include "gen6_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[33];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[33];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
struct kgem_bo *bo = NULL;
void *base;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
 
if (reloc == -1) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->target_handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
 
base = (char *)base + kgem->reloc[i].delta;
i = data[0] >> 26;
 
state.vb[i].current = bo;
state.vb[i].ptr = base;
state.vb[i].pitch = data[0] & 0x7ff;
}
 
static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
{
uint32_t reloc = sizeof(uint32_t) * offset;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
if ((kgem->batch[offset] & 1) == 0)
return;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
if(i < kgem->nreloc) {
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + (kgem->reloc[i].delta & ~1);
} else {
bo = NULL;
base = NULL;
ptr = NULL;
}
 
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
state.dynamic_state.ptr = ptr;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen6_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 26;
state.ve[id].valid = !!(data[0] & (1 << 25));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void gen6_update_sf_state(struct kgem *kgem, uint32_t *data)
{
state.num_ve = 1 + ((data[1] >> 22) & 0x3f);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN6_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN6_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN6_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN6_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN6_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN6_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN6_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN6_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (ve->valid)
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[1]; n++) {
int v = data[2] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_965_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_965_prim_type(uint32_t data)
{
uint32_t primtype = (data >> 10) & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + (delta & ~3);
}
 
static const char *
gen6_filter_to_string(uint32_t filter)
{
switch (filter) {
default:
case GEN6_MAPFILTER_NEAREST: return "nearest";
case GEN6_MAPFILTER_LINEAR: return "linear";
}
}
 
static const char *
gen6_repeat_to_string(uint32_t repeat)
{
switch (repeat) {
default:
case GEN6_TEXCOORDMODE_CLAMP_BORDER: return "border";
case GEN6_TEXCOORDMODE_WRAP: return "wrap";
case GEN6_TEXCOORDMODE_CLAMP: return "clamp";
case GEN6_TEXCOORDMODE_MIRROR: return "mirror";
}
}
 
static void
gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen6_sampler_state *ss;
struct reloc r;
const char *min, *mag;
const char *s_wrap, *t_wrap, *r_wrap;
 
ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
min = gen6_filter_to_string(ss->ss0.min_filter);
mag = gen6_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode);
t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode);
r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode);
 
ErrorF(" Sampler 0:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
 
ss++;
min = gen6_filter_to_string(ss->ss0.min_filter);
mag = gen6_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode);
t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode);
r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode);
 
ErrorF(" Sampler 1:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
}
 
static const char *
gen6_blend_factor_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN6_BLENDFACTOR_##x: return #x;
C(ONE);
C(SRC_COLOR);
C(SRC_ALPHA);
C(DST_ALPHA);
C(DST_COLOR);
C(SRC_ALPHA_SATURATE);
C(CONST_COLOR);
C(CONST_ALPHA);
C(SRC1_COLOR);
C(SRC1_ALPHA);
C(ZERO);
C(INV_SRC_COLOR);
C(INV_SRC_ALPHA);
C(INV_DST_ALPHA);
C(INV_DST_COLOR);
C(INV_CONST_COLOR);
C(INV_CONST_ALPHA);
C(INV_SRC1_COLOR);
C(INV_SRC1_ALPHA);
#undef C
default: return "???";
}
}
 
static const char *
gen6_blend_function_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN6_BLENDFUNCTION_##x: return #x;
C(ADD);
C(SUBTRACT);
C(REVERSE_SUBTRACT);
C(MIN);
C(MAX);
#undef C
default: return "???";
}
}
 
static float unpack_float(uint32_t dw)
{
union {
float f;
uint32_t dw;
} u;
u.dw = dw;
return u.f;
}
 
static void
gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen6_blend_state *blend;
struct reloc r;
const char *dst, *src;
const char *func;
 
blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
dst = gen6_blend_factor_to_string(blend->blend0.dest_blend_factor);
src = gen6_blend_factor_to_string(blend->blend0.source_blend_factor);
func = gen6_blend_function_to_string(blend->blend0.blend_func);
 
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
blend->blend0.blend_enable ? "enabled" : "disabled",
func, src, dst);
}
 
int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
{ 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
{ 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" },
{ 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" },
{ 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" },
{ 0x7b00, 6, 6, "3DPRIMITIVE" },
{ 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" },
{ 0x7805, 3, 3, "3DSTATE_URB" },
{ 0x780d, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" },
{ 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" },
{ 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" },
{ 0x7810, 6, 6, "3DSTATE_VS_STATE" },
{ 0x7811, 7, 7, "3DSTATE_GS_STATE" },
{ 0x7812, 4, 4, "3DSTATE_CLIP_STATE" },
{ 0x7813, 20, 20, "3DSTATE_SF_STATE" },
{ 0x7814, 9, 9, "3DSTATE_WM_STATE" },
{ 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" },
{ 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" },
{ 0x7817, 5, 5, "3DSTATE_CONSTANT_WM_STATE" },
{ 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i, j;
const char *desc1 = NULL;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
if (kgem->gen >= 060) {
assert(len == 10);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "dynamic");
state_base_out(data, offset, i++, "indirect");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "dynamic");
state_max_out(data, offset, i++, "indirect");
state_max_out(data, offset, i++, "instruction");
 
gen6_update_dynamic_buffer(kgem, offset + 3);
} else if (kgem->gen >= 050) {
assert(len == 8);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "media");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "media");
state_max_out(data, offset, i++, "instruction");
}
 
return len;
 
case 0x7801:
if (kgem->gen >= 060) {
assert(len == 4);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, "
"GS mod %d, WM mod %d\n",
(data[0] & (1 << 8)) != 0,
(data[0] & (1 << 9)) != 0,
(data[0] & (1 << 12)) != 0);
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "WM binding table\n");
} else if (kgem->gen >= 040) {
assert(len == 6);
 
kgem_debug_print(data, offset, 0,
"3DSTATE_BINDING_TABLE_POINTERS\n");
kgem_debug_print(data, offset, 1, "VS binding table\n");
kgem_debug_print(data, offset, 2, "GS binding table\n");
kgem_debug_print(data, offset, 3, "CLIP binding table\n");
kgem_debug_print(data, offset, 4, "SF binding table\n");
kgem_debug_print(data, offset, 5, "WM binding table\n");
}
 
return len;
 
case 0x7802:
assert(len == 4);
kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE_POINTERS: VS mod %d, "
"GS mod %d, WM mod %d\n",
(data[0] & (1 << 8)) != 0,
(data[0] & (1 << 9)) != 0,
(data[0] & (1 << 12)) != 0);
kgem_debug_print(data, offset, 1, "VS sampler state\n");
kgem_debug_print(data, offset, 2, "GS sampler state\n");
kgem_debug_print(data, offset, 3, "WM sampler state\n");
gen6_decode_sampler_state(kgem, &data[3]);
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen6_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 26,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
for (i = 1; i < len;) {
gen6_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 26,
data[i] & (1 << 25) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
"dst offset 0x%02x bytes\n",
get_965_element_component(data[i], 0),
get_965_element_component(data[i], 1),
get_965_element_component(data[i], 2),
get_965_element_component(data[i], 3),
(data[i] & 0xff) * 4);
i++;
}
return len;
 
case 0x780d:
assert(len == 4);
kgem_debug_print(data, offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n");
kgem_debug_print(data, offset, 1, "clip\n");
kgem_debug_print(data, offset, 2, "sf\n");
kgem_debug_print(data, offset, 3, "cc\n");
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x780e:
assert(len == 4);
kgem_debug_print(data, offset, 0, "3DSTATE_CC_STATE_POINTERS\n");
kgem_debug_print(data, offset, 1, "blend%s\n",
data[1] & 1 ? " update" : "");
if (data[1] & 1)
gen6_decode_blend(kgem, data+1);
kgem_debug_print(data, offset, 2, "depth+stencil%s\n",
data[2] & 1 ? " update" : "");
kgem_debug_print(data, offset, 3, "cc%s\n",
data[3] & 1 ? " update" : "");
return len;
 
case 0x780f:
assert(len == 2);
kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_POINTERS\n");
kgem_debug_print(data, offset, 1, "scissor rect offset\n");
return len;
 
case 0x7810:
assert(len == 6);
kgem_debug_print(data, offset, 0, "3DSTATE_VS\n");
kgem_debug_print(data, offset, 1, "kernel pointer\n");
kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
"Binding table count %d\n",
(data[2] >> 31) & 1,
(data[2] >> 30) & 1,
(data[2] >> 27) & 7,
(data[2] >> 18) & 0xff);
kgem_debug_print(data, offset, 3, "scratch offset\n");
kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, "
"VUE read offset %d\n",
(data[4] >> 20) & 0x1f,
(data[4] >> 11) & 0x3f,
(data[4] >> 4) & 0x3f);
kgem_debug_print(data, offset, 5, "Max Threads %d, Vertex Cache %sable, "
"VS func %sable\n",
((data[5] >> 25) & 0x7f) + 1,
(data[5] & (1 << 1)) != 0 ? "dis" : "en",
(data[5] & 1) != 0 ? "en" : "dis");
return len;
 
case 0x7811:
assert(len == 7);
kgem_debug_print(data, offset, 0, "3DSTATE_GS\n");
kgem_debug_print(data, offset, 1, "kernel pointer\n");
kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
"Binding table count %d\n",
(data[2] >> 31) & 1,
(data[2] >> 30) & 1,
(data[2] >> 27) & 7,
(data[2] >> 18) & 0xff);
kgem_debug_print(data, offset, 3, "scratch offset\n");
kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, "
"VUE read offset %d\n",
(data[4] & 0xf),
(data[4] >> 11) & 0x3f,
(data[4] >> 4) & 0x3f);
kgem_debug_print(data, offset, 5, "Max Threads %d, Rendering %sable\n",
((data[5] >> 25) & 0x7f) + 1,
(data[5] & (1 << 8)) != 0 ? "en" : "dis");
kgem_debug_print(data, offset, 6, "Reorder %sable, Discard Adjaceny %sable, "
"GS %sable\n",
(data[6] & (1 << 30)) != 0 ? "en" : "dis",
(data[6] & (1 << 29)) != 0 ? "en" : "dis",
(data[6] & (1 << 15)) != 0 ? "en" : "dis");
return len;
 
case 0x7812:
assert(len == 4);
kgem_debug_print(data, offset, 0, "3DSTATE_CLIP\n");
kgem_debug_print(data, offset, 1, "UserClip distance cull test mask 0x%x\n",
data[1] & 0xff);
kgem_debug_print(data, offset, 2, "Clip %sable, API mode %s, Viewport XY test %sable, "
"Viewport Z test %sable, Guardband test %sable, Clip mode %d, "
"Perspective Divide %sable, Non-Perspective Barycentric %sable, "
"Tri Provoking %d, Line Provoking %d, Trifan Provoking %d\n",
(data[2] & (1 << 31)) != 0 ? "en" : "dis",
(data[2] & (1 << 30)) != 0 ? "D3D" : "OGL",
(data[2] & (1 << 28)) != 0 ? "en" : "dis",
(data[2] & (1 << 27)) != 0 ? "en" : "dis",
(data[2] & (1 << 26)) != 0 ? "en" : "dis",
(data[2] >> 13) & 7,
(data[2] & (1 << 9)) != 0 ? "dis" : "en",
(data[2] & (1 << 8)) != 0 ? "en" : "dis",
(data[2] >> 4) & 3,
(data[2] >> 2) & 3,
(data[2] & 3));
kgem_debug_print(data, offset, 3, "Min PointWidth %d, Max PointWidth %d, "
"Force Zero RTAIndex %sable, Max VPIndex %d\n",
(data[3] >> 17) & 0x7ff,
(data[3] >> 6) & 0x7ff,
(data[3] & (1 << 5)) != 0 ? "en" : "dis",
(data[3] & 0xf));
return len;
 
case 0x7813:
gen6_update_sf_state(kgem, data);
assert(len == 20);
kgem_debug_print(data, offset, 0, "3DSTATE_SF\n");
kgem_debug_print(data, offset, 1, "Attrib Out %d, Attrib Swizzle %sable, VUE read length %d, "
"VUE read offset %d\n",
(data[1] >> 22) & 0x3f,
(data[1] & (1 << 21)) != 0 ? "en" : "dis",
(data[1] >> 11) & 0x1f,
(data[1] >> 4) & 0x3f);
kgem_debug_print(data, offset, 2, "Legacy Global DepthBias %sable, FrontFace fill %d, BF fill %d, "
"VP transform %sable, FrontWinding_%s\n",
(data[2] & (1 << 11)) != 0 ? "en" : "dis",
(data[2] >> 5) & 3,
(data[2] >> 3) & 3,
(data[2] & (1 << 1)) != 0 ? "en" : "dis",
(data[2] & 1) != 0 ? "CCW" : "CW");
kgem_debug_print(data, offset, 3, "AA %sable, CullMode %d, Scissor %sable, Multisample m ode %d\n",
(data[3] & (1 << 31)) != 0 ? "en" : "dis",
(data[3] >> 29) & 3,
(data[3] & (1 << 11)) != 0 ? "en" : "dis",
(data[3] >> 8) & 3);
kgem_debug_print(data, offset, 4, "Last Pixel %sable, SubPixel Precision %d, Use PixelWidth %d\n",
(data[4] & (1 << 31)) != 0 ? "en" : "dis",
(data[4] & (1 << 12)) != 0 ? 4 : 8,
(data[4] & (1 << 11)) != 0);
kgem_debug_print(data, offset, 5, "Global Depth Offset Constant %f\n", unpack_float(data[5]));
kgem_debug_print(data, offset, 6, "Global Depth Offset Scale %f\n", unpack_float(data[6]));
kgem_debug_print(data, offset, 7, "Global Depth Offset Clamp %f\n", unpack_float(data[7]));
for (i = 0, j = 0; i < 8; i++, j+=2)
kgem_debug_print(data, offset, i+8, "Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, "
"Source %d); Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, Source %d)\n",
j+1,
(data[8+i] & (1 << 31)) != 0 ? "W":"",
(data[8+i] & (1 << 30)) != 0 ? "Z":"",
(data[8+i] & (1 << 29)) != 0 ? "Y":"",
(data[8+i] & (1 << 28)) != 0 ? "X":"",
(data[8+i] >> 25) & 3, (data[8+i] >> 22) & 3,
(data[8+i] >> 16) & 0x1f,
j,
(data[8+i] & (1 << 15)) != 0 ? "W":"",
(data[8+i] & (1 << 14)) != 0 ? "Z":"",
(data[8+i] & (1 << 13)) != 0 ? "Y":"",
(data[8+i] & (1 << 12)) != 0 ? "X":"",
(data[8+i] >> 9) & 3, (data[8+i] >> 6) & 3,
(data[8+i] & 0x1f));
kgem_debug_print(data, offset, 16, "Point Sprite TexCoord Enable\n");
kgem_debug_print(data, offset, 17, "Const Interp Enable\n");
kgem_debug_print(data, offset, 18, "Attrib 7-0 WrapShortest Enable\n");
kgem_debug_print(data, offset, 19, "Attrib 15-8 WrapShortest Enable\n");
 
return len;
 
case 0x7814:
assert(len == 9);
kgem_debug_print(data, offset, 0, "3DSTATE_WM\n");
kgem_debug_print(data, offset, 1, "kernel start pointer 0\n");
kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, "
"Binding table count %d\n",
(data[2] >> 31) & 1,
(data[2] >> 30) & 1,
(data[2] >> 27) & 7,
(data[2] >> 18) & 0xff);
kgem_debug_print(data, offset, 3, "scratch offset\n");
kgem_debug_print(data, offset, 4, "Depth Clear %d, Depth Resolve %d, HiZ Resolve %d, "
"Dispatch GRF start[0] %d, start[1] %d, start[2] %d\n",
(data[4] & (1 << 30)) != 0,
(data[4] & (1 << 28)) != 0,
(data[4] & (1 << 27)) != 0,
(data[4] >> 16) & 0x7f,
(data[4] >> 8) & 0x7f,
(data[4] & 0x7f));
kgem_debug_print(data, offset, 5, "MaxThreads %d, PS KillPixel %d, PS computed Z %d, "
"PS use sourceZ %d, Thread Dispatch %d, PS use sourceW %d, Dispatch32 %d, "
"Dispatch16 %d, Dispatch8 %d\n",
((data[5] >> 25) & 0x7f) + 1,
(data[5] & (1 << 22)) != 0,
(data[5] & (1 << 21)) != 0,
(data[5] & (1 << 20)) != 0,
(data[5] & (1 << 19)) != 0,
(data[5] & (1 << 8)) != 0,
(data[5] & (1 << 2)) != 0,
(data[5] & (1 << 1)) != 0,
(data[5] & (1 << 0)) != 0);
kgem_debug_print(data, offset, 6, "Num SF output %d, Pos XY offset %d, ZW interp mode %d , "
"Barycentric interp mode 0x%x, Point raster rule %d, Multisample mode %d, "
"Multisample Dispatch mode %d\n",
(data[6] >> 20) & 0x3f,
(data[6] >> 18) & 3,
(data[6] >> 16) & 3,
(data[6] >> 10) & 0x3f,
(data[6] & (1 << 9)) != 0,
(data[6] >> 1) & 3,
(data[6] & 1));
kgem_debug_print(data, offset, 7, "kernel start pointer 1\n");
kgem_debug_print(data, offset, 8, "kernel start pointer 2\n");
 
return len;
 
case 0x7900:
assert(len == 4);
kgem_debug_print(data, offset, 0,
"3DSTATE_DRAWING_RECTANGLE\n");
kgem_debug_print(data, offset, 1, "top left: %d, %d\n",
(uint16_t)(data[1] & 0xffff),
(uint16_t)(data[1] >> 16));
kgem_debug_print(data, offset, 2, "bottom right: %d, %d\n",
(uint16_t)(data[2] & 0xffff),
(uint16_t)(data[2] >> 16));
kgem_debug_print(data, offset, 3, "origin: %d, %d\n",
(int16_t)(data[3] & 0xffff),
(int16_t)(data[3] >> 16));
return len;
 
case 0x7905:
assert(len == 7);
kgem_debug_print(data, offset, 0,
"3DSTATE_DEPTH_BUFFER\n");
kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n",
get_965_surfacetype(data[1] >> 29),
get_965_depthformat((data[1] >> 18) & 0x7),
(data[1] & 0x0001ffff) + 1,
data[1] & (1 << 27) ? "" : "not ",
(data[1] & (1 << 22)) != 0,
(data[1] & (1 << 21)) != 0);
kgem_debug_print(data, offset, 2, "depth offset\n");
kgem_debug_print(data, offset, 3, "%dx%d\n",
((data[3] & 0x0007ffc0) >> 6) + 1,
((data[3] & 0xfff80000) >> 19) + 1);
kgem_debug_print(data, offset, 4, "volume depth\n");
kgem_debug_print(data, offset, 5, "\n");
kgem_debug_print(data, offset, 6, "\n");
return len;
 
case 0x7a00:
assert(len == 4 || len == 5);
switch ((data[1] >> 14) & 0x3) {
case 0: desc1 = "no write"; break;
case 1: desc1 = "qword write"; break;
case 2: desc1 = "PS_DEPTH_COUNT write"; break;
case 3: desc1 = "TIMESTAMP write"; break;
}
kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n");
kgem_debug_print(data, offset, 1,
"%s, %scs stall, %stlb invalidate, "
"%ssync gfdt, %sdepth stall, %sRC write flush, "
"%sinst flush, %sTC flush\n",
desc1,
data[1] & (1 << 20) ? "" : "no ",
data[1] & (1 << 18) ? "" : "no ",
data[1] & (1 << 17) ? "" : "no ",
data[1] & (1 << 13) ? "" : "no ",
data[1] & (1 << 12) ? "" : "no ",
data[1] & (1 << 11) ? "" : "no ",
data[1] & (1 << 10) ? "" : "no ");
if (len == 5) {
kgem_debug_print(data, offset, 2, "destination address\n");
kgem_debug_print(data, offset, 3, "immediate dword low\n");
kgem_debug_print(data, offset, 4, "immediate dword high\n");
} else {
for (i = 2; i < len; i++) {
kgem_debug_print(data, offset, i, "\n");
}
}
return len;
 
case 0x7b00:
assert(len == 6);
kgem_debug_print(data, offset, 0,
"3DPRIMITIVE: %s %s\n",
get_965_prim_type(data[0]),
(data[0] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 1, "vertex count\n");
kgem_debug_print(data, offset, 2, "start vertex\n");
kgem_debug_print(data, offset, 3, "instance count\n");
kgem_debug_print(data, offset, 4, "start instance\n");
kgem_debug_print(data, offset, 5, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode)
break;
 
assert(i < ARRAY_SIZE(opcodes));
 
len = 1;
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
len = (data[0] & 0xff) + 2;
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
 
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen6_finish_state(struct kgem *kgem)
{
finish_state(kgem);
}
/drivers/video/Intel-2D/sna/kgem_debug_gen7.c
0,0 → 1,715
/*
* Copyright © 2007-2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Eric Anholt <eric@anholt.net>
* Chris Wilson <chris"chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <assert.h>
 
#include "sna.h"
#include "sna_reg.h"
#include "gen7_render.h"
 
#include "kgem_debug.h"
 
static struct state {
struct vertex_buffer {
int handle;
void *base;
const char *ptr;
int pitch;
 
struct kgem_bo *current;
} vb[33];
struct vertex_elements {
int buffer;
int offset;
bool valid;
uint32_t type;
uint8_t swizzle[4];
} ve[33];
int num_ve;
 
struct dynamic_state {
struct kgem_bo *current;
void *base, *ptr;
} dynamic_state;
} state;
 
static void gen7_update_vertex_buffer(struct kgem *kgem, const uint32_t *data)
{
uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch);
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
assert(i < kgem->nreloc);
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + kgem->reloc[i].delta;
 
i = data[0] >> 26;
 
state.vb[i].current = bo;
state.vb[i].base = base;
state.vb[i].ptr = ptr;
state.vb[i].pitch = data[0] & 0x7ff;
}
 
static void gen7_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset)
{
uint32_t reloc = sizeof(uint32_t) * offset;
struct kgem_bo *bo = NULL;
void *base, *ptr;
int i;
 
if ((kgem->batch[offset] & 1) == 0)
return;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == reloc)
break;
if(i < kgem->nreloc) {
reloc = kgem->reloc[i].target_handle;
 
if (reloc == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == reloc)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
}
ptr = (char *)base + (kgem->reloc[i].delta & ~1);
} else {
bo = NULL;
base = NULL;
ptr = NULL;
}
 
state.dynamic_state.current = bo;
state.dynamic_state.base = base;
state.dynamic_state.ptr = ptr;
}
 
static uint32_t
get_ve_component(uint32_t data, int component)
{
return (data >> (16 + (3 - component) * 4)) & 0x7;
}
 
static void gen7_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data)
{
state.ve[id].buffer = data[0] >> 26;
state.ve[id].valid = !!(data[0] & (1 << 25));
state.ve[id].type = (data[0] >> 16) & 0x1ff;
state.ve[id].offset = data[0] & 0x7ff;
state.ve[id].swizzle[0] = get_ve_component(data[1], 0);
state.ve[id].swizzle[1] = get_ve_component(data[1], 1);
state.ve[id].swizzle[2] = get_ve_component(data[1], 2);
state.ve[id].swizzle[3] = get_ve_component(data[1], 3);
}
 
static void gen7_update_sf_state(struct kgem *kgem, uint32_t *data)
{
state.num_ve = 1 + ((data[1] >> 22) & 0x3f);
}
 
static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max)
{
int c;
 
ErrorF("(");
for (c = 0; c < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%d", v[c]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max)
{
int c, o;
 
ErrorF("(");
for (c = o = 0; c < 4 && o < max; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("%f", f[o++]); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
for (; c < 4; c++) {
switch (ve->swizzle[c]) {
case 0: ErrorF("#"); break;
case 1: ErrorF("1.0"); break;
case 2: ErrorF("0.0"); break;
case 3: ErrorF("1.0"); break;
case 4: ErrorF("0x1"); break;
case 5: break;
default: ErrorF("?");
}
if (c < 3)
ErrorF(", ");
}
ErrorF(")");
}
 
static void ve_out(const struct vertex_elements *ve, const void *ptr)
{
switch (ve->type) {
case GEN7_SURFACEFORMAT_R32_FLOAT:
vertices_float_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R32G32_FLOAT:
vertices_float_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R32G32B32_FLOAT:
vertices_float_out(ve, ptr, 3);
break;
case GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT:
vertices_float_out(ve, ptr, 4);
break;
case GEN7_SURFACEFORMAT_R16_SINT:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R16G16_SINT:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R16G16B16A16_SINT:
vertices_sint16_out(ve, ptr, 4);
break;
case GEN7_SURFACEFORMAT_R16_SSCALED:
vertices_sint16_out(ve, ptr, 1);
break;
case GEN7_SURFACEFORMAT_R16G16_SSCALED:
vertices_sint16_out(ve, ptr, 2);
break;
case GEN7_SURFACEFORMAT_R16G16B16A16_SSCALED:
vertices_sint16_out(ve, ptr, 4);
break;
}
}
 
static void indirect_vertex_out(struct kgem *kgem, uint32_t v)
{
int i = 1;
 
do {
const struct vertex_elements *ve = &state.ve[i];
const struct vertex_buffer *vb = &state.vb[ve->buffer];
const void *ptr = vb->ptr + v * vb->pitch + ve->offset;
 
if (!ve->valid)
continue;
 
ve_out(ve, ptr);
 
while (++i <= state.num_ve && !state.ve[i].valid)
;
 
if (i <= state.num_ve)
ErrorF(", ");
} while (i <= state.num_ve);
}
 
static void primitive_out(struct kgem *kgem, uint32_t *data)
{
int n;
 
assert((data[0] & (1<<15)) == 0); /* XXX index buffers */
 
for (n = 0; n < data[2]; n++) {
int v = data[3] + n;
ErrorF(" [%d:%d] = ", n, v);
indirect_vertex_out(kgem, v);
ErrorF("\n");
}
}
 
static void finish_state(struct kgem *kgem)
{
memset(&state, 0, sizeof(state));
}
 
static void
state_base_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state base address 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state base not updated\n",
name);
}
 
static void
state_max_out(uint32_t *data, uint32_t offset, unsigned int index,
const char *name)
{
if (data[index] == 1)
kgem_debug_print(data, offset, index,
"%s state upper bound disabled\n", name);
else if (data[index] & 1)
kgem_debug_print(data, offset, index,
"%s state upper bound 0x%08x\n",
name, data[index] & ~1);
else
kgem_debug_print(data, offset, index,
"%s state upper bound not updated\n",
name);
}
 
static const char *
get_965_surfacetype(unsigned int surfacetype)
{
switch (surfacetype) {
case 0: return "1D";
case 1: return "2D";
case 2: return "3D";
case 3: return "CUBE";
case 4: return "BUFFER";
case 7: return "NULL";
default: return "unknown";
}
}
 
static const char *
get_965_depthformat(unsigned int depthformat)
{
switch (depthformat) {
case 0: return "s8_z24float";
case 1: return "z32float";
case 2: return "z24s8";
case 5: return "z16";
default: return "unknown";
}
}
 
static const char *
get_element_component(uint32_t data, int component)
{
uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
 
switch (component_control) {
case 0:
return "nostore";
case 1:
switch (component) {
case 0: return "X";
case 1: return "Y";
case 2: return "Z";
case 3: return "W";
default: return "fail";
}
case 2:
return "0.0";
case 3:
return "1.0";
case 4:
return "0x1";
case 5:
return "VID";
default:
return "fail";
}
}
 
static const char *
get_prim_type(uint32_t data)
{
uint32_t primtype = data & 0x1f;
 
switch (primtype) {
case 0x01: return "point list";
case 0x02: return "line list";
case 0x03: return "line strip";
case 0x04: return "tri list";
case 0x05: return "tri strip";
case 0x06: return "tri fan";
case 0x07: return "quad list";
case 0x08: return "quad strip";
case 0x09: return "line list adj";
case 0x0a: return "line strip adj";
case 0x0b: return "tri list adj";
case 0x0c: return "tri strip adj";
case 0x0d: return "tri strip reverse";
case 0x0e: return "polygon";
case 0x0f: return "rect list";
case 0x10: return "line loop";
case 0x11: return "point list bf";
case 0x12: return "line strip cont";
case 0x13: return "line strip bf";
case 0x14: return "line strip cont bf";
case 0x15: return "tri fan no stipple";
default: return "fail";
}
}
 
struct reloc {
struct kgem_bo *bo;
void *base;
};
 
static void *
get_reloc(struct kgem *kgem,
void *base, const uint32_t *reloc,
struct reloc *r)
{
uint32_t delta = *reloc;
 
memset(r, 0, sizeof(*r));
 
if (base == 0) {
uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch);
struct kgem_bo *bo = NULL;
int i;
 
for (i = 0; i < kgem->nreloc; i++)
if (kgem->reloc[i].offset == handle)
break;
assert(i < kgem->nreloc);
handle = kgem->reloc[i].target_handle;
delta = kgem->reloc[i].delta;
 
if (handle == 0) {
base = kgem->batch;
} else {
list_for_each_entry(bo, &kgem->next_request->buffers, request)
if (bo->handle == handle)
break;
assert(&bo->request != &kgem->next_request->buffers);
base = kgem_bo_map__debug(kgem, bo);
r->bo = bo;
r->base = base;
}
}
 
return (char *)base + (delta & ~3);
}
 
static const char *
gen7_filter_to_string(uint32_t filter)
{
switch (filter) {
default:
case GEN7_MAPFILTER_NEAREST: return "nearest";
case GEN7_MAPFILTER_LINEAR: return "linear";
}
}
 
static const char *
gen7_repeat_to_string(uint32_t repeat)
{
switch (repeat) {
default:
case GEN7_TEXCOORDMODE_CLAMP_BORDER: return "border";
case GEN7_TEXCOORDMODE_WRAP: return "wrap";
case GEN7_TEXCOORDMODE_CLAMP: return "clamp";
case GEN7_TEXCOORDMODE_MIRROR: return "mirror";
}
}
 
static void
gen7_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen7_sampler_state *ss;
struct reloc r;
const char *min, *mag;
const char *s_wrap, *t_wrap, *r_wrap;
 
ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
min = gen7_filter_to_string(ss->ss0.min_filter);
mag = gen7_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode);
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode);
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode);
 
ErrorF(" Sampler 0:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
 
ss++;
min = gen7_filter_to_string(ss->ss0.min_filter);
mag = gen7_filter_to_string(ss->ss0.mag_filter);
 
s_wrap = gen7_repeat_to_string(ss->ss3.s_wrap_mode);
t_wrap = gen7_repeat_to_string(ss->ss3.t_wrap_mode);
r_wrap = gen7_repeat_to_string(ss->ss3.r_wrap_mode);
 
ErrorF(" Sampler 1:\n");
ErrorF(" filter: min=%s, mag=%s\n", min, mag);
ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap);
}
 
static const char *
gen7_blend_factor_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN7_BLENDFACTOR_##x: return #x;
C(ONE);
C(SRC_COLOR);
C(SRC_ALPHA);
C(DST_ALPHA);
C(DST_COLOR);
C(SRC_ALPHA_SATURATE);
C(CONST_COLOR);
C(CONST_ALPHA);
C(SRC1_COLOR);
C(SRC1_ALPHA);
C(ZERO);
C(INV_SRC_COLOR);
C(INV_SRC_ALPHA);
C(INV_DST_ALPHA);
C(INV_DST_COLOR);
C(INV_CONST_COLOR);
C(INV_CONST_ALPHA);
C(INV_SRC1_COLOR);
C(INV_SRC1_ALPHA);
#undef C
default: return "???";
}
}
 
static const char *
gen7_blend_function_to_string(uint32_t v)
{
switch (v) {
#define C(x) case GEN7_BLENDFUNCTION_##x: return #x;
C(ADD);
C(SUBTRACT);
C(REVERSE_SUBTRACT);
C(MIN);
C(MAX);
#undef C
default: return "???";
}
}
 
static void
gen7_decode_blend(struct kgem *kgem, const uint32_t *reloc)
{
const struct gen7_blend_state *blend;
struct reloc r;
const char *dst, *src;
const char *func;
 
blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r);
 
dst = gen7_blend_factor_to_string(blend->blend0.dest_blend_factor);
src = gen7_blend_factor_to_string(blend->blend0.source_blend_factor);
func = gen7_blend_function_to_string(blend->blend0.blend_func);
 
ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n",
blend->blend0.blend_enable ? "enabled" : "disabled",
func, src, dst);
}
 
int kgem_gen7_decode_3d(struct kgem *kgem, uint32_t offset)
{
static const struct {
uint32_t opcode;
int min_len;
int max_len;
const char *name;
} opcodes[] = {
{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
{ 0x6102, 2, 2 , "STATE_SIP" },
{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
};
uint32_t *data = kgem->batch + offset;
uint32_t op;
unsigned int len;
int i;
const char *name;
 
len = (data[0] & 0xff) + 2;
op = (data[0] & 0xffff0000) >> 16;
switch (op) {
case 0x6101:
i = 0;
kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n");
assert(len == 10);
 
state_base_out(data, offset, i++, "general");
state_base_out(data, offset, i++, "surface");
state_base_out(data, offset, i++, "dynamic");
state_base_out(data, offset, i++, "indirect");
state_base_out(data, offset, i++, "instruction");
 
state_max_out(data, offset, i++, "general");
state_max_out(data, offset, i++, "dynamic");
state_max_out(data, offset, i++, "indirect");
state_max_out(data, offset, i++, "instruction");
 
gen7_update_dynamic_buffer(kgem, offset + 3);
 
return len;
 
case 0x7808:
assert((len - 1) % 4 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
 
for (i = 1; i < len;) {
gen7_update_vertex_buffer(kgem, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n",
data[i] >> 26,
data[i] & (1 << 20) ? "random" : "sequential",
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i++, "buffer address\n");
kgem_debug_print(data, offset, i++, "max index\n");
kgem_debug_print(data, offset, i++, "mbz\n");
}
return len;
 
case 0x7809:
assert((len + 1) % 2 == 0);
kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
 
for (i = 1; i < len;) {
gen7_update_vertex_elements(kgem, (i - 1)/2, data + i);
 
kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, "
"src offset 0x%04x bytes\n",
data[i] >> 26,
data[i] & (1 << 25) ? "" : "in",
(data[i] >> 16) & 0x1ff,
data[i] & 0x07ff);
i++;
kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), "
"dst offset 0x%02x bytes\n",
get_element_component(data[i], 0),
get_element_component(data[i], 1),
get_element_component(data[i], 2),
get_element_component(data[i], 3),
(data[i] & 0xff) * 4);
i++;
}
return len;
 
case 0x780a:
assert(len == 3);
kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n");
kgem_debug_print(data, offset, 1, "beginning buffer address\n");
kgem_debug_print(data, offset, 2, "ending buffer address\n");
return len;
 
case 0x7b00:
assert(len == 7);
kgem_debug_print(data, offset, 0, "3DPRIMITIVE\n");
kgem_debug_print(data, offset, 1, "type %s, %s\n",
get_prim_type(data[1]),
(data[1] & (1 << 15)) ? "random" : "sequential");
kgem_debug_print(data, offset, 2, "vertex count\n");
kgem_debug_print(data, offset, 3, "start vertex\n");
kgem_debug_print(data, offset, 4, "instance count\n");
kgem_debug_print(data, offset, 5, "start instance\n");
kgem_debug_print(data, offset, 6, "index bias\n");
primitive_out(kgem, data);
return len;
}
 
/* For the rest, just dump the bytes */
name = NULL;
for (i = 0; i < ARRAY_SIZE(opcodes); i++)
if (op == opcodes[i].opcode) {
name = opcodes[i].name;
break;
}
 
len = (data[0] & 0xff) + 2;
if (name == NULL) {
kgem_debug_print(data, offset, 0, "unknown\n");
} else {
kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name);
if (opcodes[i].max_len > 1) {
assert(len >= opcodes[i].min_len &&
len <= opcodes[i].max_len);
}
}
for (i = 1; i < len; i++)
kgem_debug_print(data, offset, i, "dword %d\n", i);
 
return len;
}
 
void kgem_gen7_finish_state(struct kgem *kgem)
{
finish_state(kgem);
}
/drivers/video/Intel-2D/sna/pciaccess.h
0,0 → 1,537
/*
* (C) Copyright IBM Corporation 2006
* Copyright 2009 Red Hat, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2007 Paulo R. Zanoni, Tiago Vignatti
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
/**
* \file pciaccess.h
*
* \author Ian Romanick <idr@us.ibm.com>
*/
 
#ifndef PCIACCESS_H
#define PCIACCESS_H
 
#include <inttypes.h>
 
#if __GNUC__ >= 3
#define __deprecated __attribute__((deprecated))
#else
#define __deprecated
#endif
 
typedef uint64_t pciaddr_t;
 
struct pci_device;
struct pci_device_iterator;
struct pci_id_match;
struct pci_slot_match;
 
#ifdef __cplusplus
extern "C" {
#endif
 
int pci_device_has_kernel_driver(struct pci_device *dev);
 
int pci_device_is_boot_vga(struct pci_device *dev);
 
int pci_device_read_rom(struct pci_device *dev, void *buffer);
 
int __deprecated pci_device_map_region(struct pci_device *dev,
unsigned region, int write_enable);
 
int __deprecated pci_device_unmap_region(struct pci_device *dev,
unsigned region);
 
int pci_device_map_range(struct pci_device *dev, pciaddr_t base,
pciaddr_t size, unsigned map_flags, void **addr);
 
int pci_device_unmap_range(struct pci_device *dev, void *memory,
pciaddr_t size);
 
int __deprecated pci_device_map_memory_range(struct pci_device *dev,
pciaddr_t base, pciaddr_t size, int write_enable, void **addr);
 
int __deprecated pci_device_unmap_memory_range(struct pci_device *dev,
void *memory, pciaddr_t size);
 
int pci_device_probe(struct pci_device *dev);
 
const struct pci_agp_info *pci_device_get_agp_info(struct pci_device *dev);
 
const struct pci_bridge_info *pci_device_get_bridge_info(
struct pci_device *dev);
 
const struct pci_pcmcia_bridge_info *pci_device_get_pcmcia_bridge_info(
struct pci_device *dev);
 
int pci_device_get_bridge_buses(struct pci_device *dev, int *primary_bus,
int *secondary_bus, int *subordinate_bus);
 
int pci_system_init(void);
 
void pci_system_init_dev_mem(int fd);
 
void pci_system_cleanup(void);
 
struct pci_device_iterator *pci_slot_match_iterator_create(
const struct pci_slot_match *match);
 
struct pci_device_iterator *pci_id_match_iterator_create(
const struct pci_id_match *match);
 
void pci_iterator_destroy(struct pci_device_iterator *iter);
 
struct pci_device *pci_device_next(struct pci_device_iterator *iter);
 
struct pci_device *pci_device_find_by_slot(uint32_t domain, uint32_t bus,
uint32_t dev, uint32_t func);
 
struct pci_device *pci_device_get_parent_bridge(struct pci_device *dev);
 
void pci_get_strings(const struct pci_id_match *m,
const char **device_name, const char **vendor_name,
const char **subdevice_name, const char **subvendor_name);
const char *pci_device_get_device_name(const struct pci_device *dev);
const char *pci_device_get_subdevice_name(const struct pci_device *dev);
const char *pci_device_get_vendor_name(const struct pci_device *dev);
const char *pci_device_get_subvendor_name(const struct pci_device *dev);
 
void pci_device_enable(struct pci_device *dev);
 
int pci_device_cfg_read (struct pci_device *dev, void *data,
pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_read);
int pci_device_cfg_read_u8 (struct pci_device *dev, uint8_t *data,
pciaddr_t offset);
int pci_device_cfg_read_u16(struct pci_device *dev, uint16_t *data,
pciaddr_t offset);
int pci_device_cfg_read_u32(struct pci_device *dev, uint32_t *data,
pciaddr_t offset);
 
int pci_device_cfg_write (struct pci_device *dev, const void *data,
pciaddr_t offset, pciaddr_t size, pciaddr_t *bytes_written);
int pci_device_cfg_write_u8 (struct pci_device *dev, uint8_t data,
pciaddr_t offset);
int pci_device_cfg_write_u16(struct pci_device *dev, uint16_t data,
pciaddr_t offset);
int pci_device_cfg_write_u32(struct pci_device *dev, uint32_t data,
pciaddr_t offset);
int pci_device_cfg_write_bits(struct pci_device *dev, uint32_t mask,
uint32_t data, pciaddr_t offset);
 
#ifdef __cplusplus
}
#endif
 
/**
* \name Mapping flags passed to \c pci_device_map_range
*/
/*@{*/
#define PCI_DEV_MAP_FLAG_WRITABLE (1U<<0)
#define PCI_DEV_MAP_FLAG_WRITE_COMBINE (1U<<1)
#define PCI_DEV_MAP_FLAG_CACHABLE (1U<<2)
/*@}*/
 
 
#define PCI_MATCH_ANY (~0)
 
/**
* Compare two PCI ID values (either vendor or device). This is used
* internally to compare the fields of \c pci_id_match to the fields of
* \c pci_device.
*/
#define PCI_ID_COMPARE(a, b) \
(((a) == PCI_MATCH_ANY) || ((a) == (b)))
 
/**
*/
struct pci_id_match {
/**
* \name Device / vendor matching controls
*
* Control the search based on the device, vendor, subdevice, or subvendor
* IDs. Setting any of these fields to \c PCI_MATCH_ANY will cause the
* field to not be used in the comparison.
*/
/*@{*/
uint32_t vendor_id;
uint32_t device_id;
uint32_t subvendor_id;
uint32_t subdevice_id;
/*@}*/
 
 
/**
* \name Device class matching controls
*
*/
/*@{*/
uint32_t device_class;
uint32_t device_class_mask;
/*@}*/
 
intptr_t match_data;
};
 
 
/**
*/
struct pci_slot_match {
/**
* \name Device slot matching controls
*
* Control the search based on the domain, bus, slot, and function of
* the device. Setting any of these fields to \c PCI_MATCH_ANY will cause
* the field to not be used in the comparison.
*/
/*@{*/
uint32_t domain;
uint32_t bus;
uint32_t dev;
uint32_t func;
/*@}*/
 
intptr_t match_data;
};
 
/**
* BAR descriptor for a PCI device.
*/
struct pci_mem_region {
/**
* When the region is mapped, this is the pointer to the memory.
*
* This field is \b only set when the deprecated \c pci_device_map_region
* interface is used. Use \c pci_device_map_range instead.
*
* \deprecated
*/
void *memory;
 
 
/**
* Base physical address of the region within its bus / domain.
*
* \warning
* This address is really only useful to other devices in the same
* domain. It's probably \b not the address applications will ever
* use.
*
* \warning
* Most (all?) platform back-ends leave this field unset.
*/
pciaddr_t bus_addr;
 
 
/**
* Base physical address of the region from the CPU's point of view.
*
* This address is typically passed to \c pci_device_map_range to create
* a mapping of the region to the CPU's virtual address space.
*/
pciaddr_t base_addr;
 
 
/**
* Size, in bytes, of the region.
*/
pciaddr_t size;
 
 
/**
* Is the region I/O ports or memory?
*/
unsigned is_IO:1;
 
/**
* Is the memory region prefetchable?
*
* \note
* This can only be set if \c is_IO is not set.
*/
unsigned is_prefetchable:1;
 
 
/**
* Is the memory at a 64-bit address?
*
* \note
* This can only be set if \c is_IO is not set.
*/
unsigned is_64:1;
};
 
 
/**
* PCI device.
*
* Contains all of the information about a particular PCI device.
*/
struct pci_device {
/**
* \name Device bus identification.
*
* Complete bus identification, including domain, of the device. On
* platforms that do not support PCI domains (e.g., 32-bit x86 hardware),
* the domain will always be zero.
*/
/*@{*/
uint16_t domain;
uint8_t bus;
uint8_t dev;
uint8_t func;
/*@}*/
 
 
/**
* \name Vendor / device ID
*
* The vendor ID, device ID, and sub-IDs for the device.
*/
/*@{*/
uint16_t vendor_id;
uint16_t device_id;
uint16_t subvendor_id;
uint16_t subdevice_id;
/*@}*/
 
/**
* Device's class, subclass, and programming interface packed into a
* single 32-bit value. The class is at bits [23:16], subclass is at
* bits [15:8], and programming interface is at [7:0].
*/
uint32_t device_class;
 
 
/**
* Device revision number, as read from the configuration header.
*/
uint8_t revision;
 
 
/**
* BAR descriptors for the device.
*/
struct pci_mem_region regions[6];
 
 
/**
* Size, in bytes, of the device's expansion ROM.
*/
pciaddr_t rom_size;
 
 
/**
* IRQ associated with the device. If there is no IRQ, this value will
* be -1.
*/
int irq;
 
 
/**
* Storage for user data. Users of the library can store arbitrary
* data in this pointer. The library will not use it for any purpose.
* It is the user's responsability to free this memory before destroying
* the \c pci_device structure.
*/
intptr_t user_data;
 
/**
* Used by the VGA arbiter. Type of resource decoded by the device and
* the file descriptor (/dev/vga_arbiter). */
int vgaarb_rsrc;
};
 
 
/**
* Description of the AGP capability of the device.
*
* \sa pci_device_get_agp_info
*/
struct pci_agp_info {
/**
* Offset of the AGP registers in the devices configuration register
* space. This is generally used so that the offset of the AGP command
* register can be determined.
*/
unsigned config_offset;
 
 
/**
* \name AGP major / minor version.
*/
/*@{*/
uint8_t major_version;
uint8_t minor_version;
/*@}*/
 
/**
* Logical OR of the supported AGP rates. For example, a value of 0x07
* means that the device can support 1x, 2x, and 4x. A value of 0x0c
* means that the device can support 8x and 4x.
*/
uint8_t rates;
 
unsigned int fast_writes:1; /**< Are fast-writes supported? */
unsigned int addr64:1;
unsigned int htrans:1;
unsigned int gart64:1;
unsigned int coherent:1;
unsigned int sideband:1; /**< Is side-band addressing supported? */
unsigned int isochronus:1;
 
uint8_t async_req_size;
uint8_t calibration_cycle_timing;
uint8_t max_requests;
};
 
/**
* Description of a PCI-to-PCI bridge device.
*
* \sa pci_device_get_bridge_info
*/
struct pci_bridge_info {
uint8_t primary_bus;
uint8_t secondary_bus;
uint8_t subordinate_bus;
uint8_t secondary_latency_timer;
 
uint8_t io_type;
uint8_t mem_type;
uint8_t prefetch_mem_type;
 
uint16_t secondary_status;
uint16_t bridge_control;
 
uint32_t io_base;
uint32_t io_limit;
 
uint32_t mem_base;
uint32_t mem_limit;
 
uint64_t prefetch_mem_base;
uint64_t prefetch_mem_limit;
};
 
/**
* Description of a PCI-to-PCMCIA bridge device.
*
* \sa pci_device_get_pcmcia_bridge_info
*/
struct pci_pcmcia_bridge_info {
uint8_t primary_bus;
uint8_t card_bus;
uint8_t subordinate_bus;
uint8_t cardbus_latency_timer;
 
uint16_t secondary_status;
uint16_t bridge_control;
 
struct {
uint32_t base;
uint32_t limit;
} io[2];
 
struct {
uint32_t base;
uint32_t limit;
} mem[2];
 
};
 
 
/**
* VGA Arbiter definitions, functions and related.
*/
 
/* Legacy VGA regions */
#define VGA_ARB_RSRC_NONE 0x00
#define VGA_ARB_RSRC_LEGACY_IO 0x01
#define VGA_ARB_RSRC_LEGACY_MEM 0x02
/* Non-legacy access */
#define VGA_ARB_RSRC_NORMAL_IO 0x04
#define VGA_ARB_RSRC_NORMAL_MEM 0x08
 
int pci_device_vgaarb_init (void);
void pci_device_vgaarb_fini (void);
int pci_device_vgaarb_set_target (struct pci_device *dev);
/* use the targetted device */
int pci_device_vgaarb_decodes (int new_vga_rsrc);
int pci_device_vgaarb_lock (void);
int pci_device_vgaarb_trylock (void);
int pci_device_vgaarb_unlock (void);
/* return the current device count + resource decodes for the device */
int pci_device_vgaarb_get_info (struct pci_device *dev, int *vga_count, int *rsrc_decodes);
 
/*
* I/O space access.
*/
 
struct pci_io_handle;
 
struct pci_io_handle *pci_device_open_io(struct pci_device *dev, pciaddr_t base,
pciaddr_t size);
struct pci_io_handle *pci_legacy_open_io(struct pci_device *dev, pciaddr_t base,
pciaddr_t size);
void pci_device_close_io(struct pci_device *dev, struct pci_io_handle *handle);
uint32_t pci_io_read32(struct pci_io_handle *handle, uint32_t reg);
uint16_t pci_io_read16(struct pci_io_handle *handle, uint32_t reg);
uint8_t pci_io_read8(struct pci_io_handle *handle, uint32_t reg);
void pci_io_write32(struct pci_io_handle *handle, uint32_t reg, uint32_t data);
void pci_io_write16(struct pci_io_handle *handle, uint32_t reg, uint16_t data);
void pci_io_write8(struct pci_io_handle *handle, uint32_t reg, uint8_t data);
 
/*
* Legacy memory access
*/
 
int pci_device_map_legacy(struct pci_device *dev, pciaddr_t base,
pciaddr_t size, unsigned map_flags, void **addr);
int pci_device_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size);
 
#endif /* PCIACCESS_H */
/drivers/video/Intel-2D/sna/render/exa_sf.g4b
0,0 → 1,15
{ 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
{ 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
{ 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
{ 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
{ 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
{ 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/sna/render/exa_sf.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00400001, 0x206003be, 0x00690060, 0x00000000 },
{ 0x00400040, 0x20e077bd, 0x00690080, 0x006940a0 },
{ 0x00400041, 0x202077be, 0x006900e0, 0x000000c0 },
{ 0x00400040, 0x20e077bd, 0x006900a0, 0x00694060 },
{ 0x00400041, 0x204077be, 0x006900e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
/drivers/video/Intel-2D/sna/render/exa_sf_mask.g4b
0,0 → 1,15
{ 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
{ 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/sna/render/exa_sf_mask.g5b
0,0 → 1,7
{ 0x00400031, 0x20c01fbd, 0x1069002c, 0x02100001 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
{ 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
{ 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
{ 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
{ 0x00600031, 0x20001fbc, 0x648d0000, 0x8808c800 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00800041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00800041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca_srcalpha.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca_srcalpha.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/sna/render/exa_wm_ca_srcalpha.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00800041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00800041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00800041, 0x228077bd, 0x008d0380, 0x008d0280 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_affine.g4b
0,0 → 1,8
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x000000a0, 0x008d0100 },
{ 0x00802048, 0x210077be, 0x000000a4, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x000000b0, 0x008d0100 },
{ 0x00802048, 0x214077be, 0x000000b4, 0x008d0140 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_affine.g6b
0,0 → 1,4
{ 0x0060005a, 0x210077be, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x212077be, 0x00000100, 0x008d0080 },
{ 0x0060005a, 0x214077be, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x216077be, 0x00000110, 0x008d0080 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x290077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x292077bd, 0x00000100, 0x008d0080 },
{ 0x0060005a, 0x294077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x296077bd, 0x00000110, 0x008d0080 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_projective.g4b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_projective.g5b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
{ 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
{ 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_projective.g6b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
{ 0x00600041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x212077be, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
{ 0x00600041, 0x214077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x216077be, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_projective.g7b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x00000120, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000120, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x00000100, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000100, 0x008d0080 },
{ 0x00600041, 0x290077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x292077bd, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x00000110, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x00000110, 0x008d0080 },
{ 0x00600041, 0x294077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x296077bd, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_a.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x23801c09, 0x00000000, 0x02520102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x23801c09, 0x20000000, 0x0a2a0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_a.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x23801cc9, 0x000000e0, 0x0a2a0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x23801ca9, 0x000008e0, 0x0a2c0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_argb.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x22c01c09, 0x00000000, 0x02580102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_argb.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x07800031, 0x22c01c09, 0x20000000, 0x0a8a0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_argb.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20e00022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22c01cc9, 0x000000e0, 0x0a8a0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_mask_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28e00021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22c01ca9, 0x000008e0, 0x0a8c0102 },
/drivers/video/Intel-2D/sna/render/exa_wm_noca.g4b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_noca.g5b
0,0 → 1,4
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_noca.g6b
0,0 → 1,4
{ 0x00800041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00800041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00800041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00800041, 0x228077bd, 0x008d0280, 0x008d0380 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_affine.g4b
0,0 → 1,8
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
/drivers/video/Intel-2D/sna/render/exa_wm_src_affine.g5b
0,0 → 1,4
{ 0x00802059, 0x200077bc, 0x00000060, 0x008d0100 },
{ 0x00802048, 0x204077be, 0x00000064, 0x008d0140 },
{ 0x00802059, 0x200077bc, 0x00000070, 0x008d0100 },
{ 0x00802048, 0x208077be, 0x00000074, 0x008d0140 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_affine.g6b
0,0 → 1,4
{ 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 },
{ 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_affine.g7b
0,0 → 1,4
{ 0x0060005a, 0x284077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x286077bd, 0x000000c0, 0x008d0080 },
{ 0x0060005a, 0x288077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x28a077bd, 0x000000d0, 0x008d0080 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_projective.g4b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_projective.g5b
0,0 → 1,16
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x108d03c0, 0x02100001 },
{ 0x00600031, 0x21a01fbd, 0x108d03e0, 0x02100001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_projective.g6b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
{ 0x00600041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x206077be, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
{ 0x00600041, 0x208077be, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x20a077be, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_projective.g7b
0,0 → 1,12
{ 0x0060005a, 0x23c077bd, 0x000000e0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000e0, 0x008d0080 },
{ 0x01600038, 0x218003bd, 0x008d03c0, 0x00000000 },
{ 0x01600038, 0x21a003bd, 0x008d03e0, 0x00000000 },
{ 0x0060005a, 0x23c077bd, 0x000000c0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000c0, 0x008d0080 },
{ 0x00600041, 0x284077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x286077bd, 0x008d03e0, 0x008d01a0 },
{ 0x0060005a, 0x23c077bd, 0x000000d0, 0x008d0040 },
{ 0x0060005a, 0x23e077bd, 0x000000d0, 0x008d0080 },
{ 0x00600041, 0x288077bd, 0x008d03c0, 0x008d0180 },
{ 0x00600041, 0x28a077bd, 0x008d03e0, 0x008d01a0 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_a.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22801c09, 0x00000000, 0x02520001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_a.g5b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22801c09, 0x20000000, 0x0a2a0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_a.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22801cc9, 0x00000020, 0x0a2a0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_a.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22801ca9, 0x00000820, 0x0a2c0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_argb.g4b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x21c01c09, 0x00000000, 0x02580001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_argb.g5b
0,0 → 1,2
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x21c01d29, 0x208d0000, 0x0a8a0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_argb.g6b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_argb.g7b
0,0 → 1,3
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a8c0001 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_planar.g4b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22001c09, 0x00000000, 0x02520001 },
{ 0x01800031, 0x21c01c09, 0x00000000, 0x02520003 },
{ 0x01800031, 0x22401c09, 0x00000000, 0x02520005 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_planar.g5b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x01800031, 0x22001c09, 0x20000000, 0x0a2a0001 },
{ 0x01800031, 0x21c01c09, 0x20000000, 0x0a2a0003 },
{ 0x01800031, 0x22401c09, 0x20000000, 0x0a2a0005 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_planar.g6b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x20200022, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22001cc9, 0x00000020, 0x0a2a0001 },
{ 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0003 },
{ 0x02800031, 0x22401cc9, 0x00000020, 0x0a2a0005 },
/drivers/video/Intel-2D/sna/render/exa_wm_src_sample_planar.g7b
0,0 → 1,5
{ 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
{ 0x00600001, 0x28200021, 0x008d0000, 0x00000000 },
{ 0x02800031, 0x22001ca9, 0x00000820, 0x0a2c0001 },
{ 0x02800031, 0x21c01ca9, 0x00000820, 0x0a2c0003 },
{ 0x02800031, 0x22401ca9, 0x00000820, 0x0a2c0005 },
/drivers/video/Intel-2D/sna/render/exa_wm_write.g4b
0,0 → 1,18
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
{ 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 },
{ 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 },
{ 0x00601001, 0x210003be, 0x008d0260, 0x00000000 },
{ 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/sna/render/exa_wm_write.g5b
0,0 → 1,6
{ 0x00802001, 0x304003be, 0x008d01c0, 0x00000000 },
{ 0x00802001, 0x306003be, 0x008d0200, 0x00000000 },
{ 0x00802001, 0x308003be, 0x008d0240, 0x00000000 },
{ 0x00802001, 0x30a003be, 0x008d0280, 0x00000000 },
{ 0x00600201, 0x202003be, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x548d0000, 0x94084800 },
/drivers/video/Intel-2D/sna/render/exa_wm_write.g6b
0,0 → 1,17
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/sna/render/exa_wm_write.g7b
0,0 → 1,17
{ 0x00600001, 0x284003bd, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x286003bd, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x288003bd, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x28a003bd, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x28c003bd, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x28e003bd, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x290003bd, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x292003bd, 0x008d02a0, 0x00000000 },
{ 0x05800031, 0x24001ca8, 0x00000840, 0x90031000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
/drivers/video/Intel-2D/sna/render/exa_wm_xy.g4b
0,0 → 1,4
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
/drivers/video/Intel-2D/sna/render/exa_wm_xy.g5b
0,0 → 1,4
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
/drivers/video/Intel-2D/sna/render/exa_wm_yuv_rgb.g4b
0,0 → 1,12
{ 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/sna/render/exa_wm_yuv_rgb.g5b
0,0 → 1,12
{ 0x00802040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/sna/render/exa_wm_yuv_rgb.g6b
0,0 → 1,12
{ 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/sna/render/exa_wm_yuv_rgb.g7b
0,0 → 1,12
{ 0x00800040, 0x23007fbd, 0x008d0200, 0xbd808081 },
{ 0x00800041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 },
{ 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbf008084 },
{ 0x00800040, 0x23407fbd, 0x008d0240, 0xbf008084 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x00800048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 },
{ 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
{ 0x00800001, 0x240003bc, 0x008d0300, 0x00000000 },
{ 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
{ 0x00800001, 0x228003fd, 0x00000000, 0x3f800000 },
/drivers/video/Intel-2D/sna/sna.c
0,0 → 1,991
/**************************************************************************
 
Copyright 2001 VA Linux Systems Inc., Fremont, California.
Copyright © 2002 by David Dawes
 
All Rights Reserved.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
 
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/*
* Authors: Jeff Hartmann <jhartmann@valinux.com>
* Abraham van der Merwe <abraham@2d3d.co.za>
* David Dawes <dawes@xfree86.org>
* Alan Hourihane <alanh@tungstengraphics.com>
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include <memory.h>
#include <malloc.h>
#include <kos32sys.h>
#include <pixlib2.h>
#include "i915_pciids.h"
 
#include "compiler.h"
#include "sna.h"
#include "intel_driver.h"
 
#define to_surface(x) (surface_t*)((x)->handle)
 
static struct sna_fb sna_fb;
static int tls_mask;
 
int tls_alloc(void);
 
static inline void *tls_get(int key)
{
void *val;
__asm__ __volatile__(
"movl %%fs:(%1), %0"
:"=r"(val)
:"r"(key));
 
return val;
};
 
static inline int
tls_set(int key, const void *ptr)
{
if(!(key & 3))
{
__asm__ __volatile__(
"movl %0, %%fs:(%1)"
::"r"(ptr),"r"(key));
return 0;
}
else return -1;
}
 
 
 
 
int kgem_init_fb(struct kgem *kgem, struct sna_fb *fb);
int kgem_update_fb(struct kgem *kgem, struct sna_fb *fb);
uint32_t kgem_surface_size(struct kgem *kgem,bool relaxed_fencing,
unsigned flags, uint32_t width, uint32_t height,
uint32_t bpp, uint32_t tiling, uint32_t *pitch);
struct kgem_bo *kgem_bo_from_handle(struct kgem *kgem, int handle,
int pitch, int height);
 
void kgem_close_batches(struct kgem *kgem);
void sna_bo_destroy(struct kgem *kgem, struct kgem_bo *bo);
 
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci);
 
static bool sna_solid_cache_init(struct sna *sna);
 
struct sna *sna_device;
 
__LOCK_INIT_RECURSIVE(, __sna_lock);
 
static void no_render_reset(struct sna *sna)
{
(void)sna;
}
 
static void no_render_flush(struct sna *sna)
{
(void)sna;
}
 
static void
no_render_context_switch(struct kgem *kgem,
int new_mode)
{
if (!kgem->nbatch)
return;
 
if (kgem_ring_is_idle(kgem, kgem->ring)) {
DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
_kgem_submit(kgem);
}
 
(void)new_mode;
}
 
static void
no_render_retire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_expire(struct kgem *kgem)
{
(void)kgem;
}
 
static void
no_render_fini(struct sna *sna)
{
(void)sna;
}
 
const char *no_render_init(struct sna *sna)
{
struct sna_render *render = &sna->render;
 
memset (render,0, sizeof (*render));
 
render->prefer_gpu = PREFER_GPU_BLT;
 
render->vertices = render->vertex_data;
render->vertex_size = ARRAY_SIZE(render->vertex_data);
 
render->reset = no_render_reset;
render->flush = no_render_flush;
render->fini = no_render_fini;
 
sna->kgem.context_switch = no_render_context_switch;
sna->kgem.retire = no_render_retire;
sna->kgem.expire = no_render_expire;
 
sna->kgem.mode = KGEM_RENDER;
sna->kgem.ring = KGEM_RENDER;
 
sna_vertex_init(sna);
return "generic";
}
 
void sna_vertex_init(struct sna *sna)
{
// pthread_mutex_init(&sna->render.lock, NULL);
// pthread_cond_init(&sna->render.wait, NULL);
sna->render.active = 0;
}
 
int sna_accel_init(struct sna *sna)
{
const char *backend;
 
backend = no_render_init(sna);
if (sna->info->gen >= 0100)
(void)backend;
else if (sna->info->gen >= 070)
backend = gen7_render_init(sna, backend);
else if (sna->info->gen >= 060)
backend = gen6_render_init(sna, backend);
else if (sna->info->gen >= 050)
backend = gen5_render_init(sna, backend);
else if (sna->info->gen >= 040)
backend = gen4_render_init(sna, backend);
else if (sna->info->gen >= 030)
backend = gen3_render_init(sna, backend);
 
DBG(("%s(backend=%s, prefer_gpu=%x)\n",
__FUNCTION__, backend, sna->render.prefer_gpu));
 
kgem_reset(&sna->kgem);
 
sna_device = sna;
 
return kgem_init_fb(&sna->kgem, &sna_fb);
}
 
int sna_init(uint32_t service)
{
ioctl_t io;
int caps = 0;
 
static struct pci_device device;
struct sna *sna;
 
DBG(("%s\n", __FUNCTION__));
 
__lock_acquire_recursive(__sna_lock);
 
if(sna_device)
goto done;
 
io.handle = service;
io.io_code = SRV_GET_PCI_INFO;
io.input = &device;
io.inp_size = sizeof(device);
io.output = NULL;
io.out_size = 0;
 
if (call_service(&io)!=0)
goto err1;
 
sna = malloc(sizeof(*sna));
if (sna == NULL)
goto err1;
 
memset(sna, 0, sizeof(*sna));
 
sna->cpu_features = sna_cpu_detect();
 
sna->PciInfo = &device;
sna->info = intel_detect_chipset(sna->PciInfo);
sna->scrn = service;
 
kgem_init(&sna->kgem, service, sna->PciInfo, sna->info->gen);
 
 
/* Disable tiling by default */
sna->tiling = 0;
 
/* Default fail-safe value of 75 Hz */
// sna->vblank_interval = 1000 * 1000 * 1000 / 75;
 
sna->flags = 0;
 
sna_accel_init(sna);
 
tls_mask = tls_alloc();
 
// printf("tls mask %x\n", tls_mask);
 
done:
caps = sna_device->render.caps;
 
err1:
__lock_release_recursive(__sna_lock);
 
return caps;
}
 
void sna_fini()
{
if( sna_device )
{
struct kgem_bo *mask;
 
__lock_acquire_recursive(__sna_lock);
 
mask = tls_get(tls_mask);
 
sna_device->render.fini(sna_device);
if(mask)
kgem_bo_destroy(&sna_device->kgem, mask);
kgem_close_batches(&sna_device->kgem);
kgem_cleanup_cache(&sna_device->kgem);
 
sna_device = NULL;
__lock_release_recursive(__sna_lock);
};
}
 
#if 0
 
static bool sna_solid_cache_init(struct sna *sna)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
 
DBG(("%s\n", __FUNCTION__));
 
cache->cache_bo =
kgem_create_linear(&sna->kgem, sizeof(cache->color));
if (!cache->cache_bo)
return FALSE;
 
/*
* Initialise [0] with white since it is very common and filling the
* zeroth slot simplifies some of the checks.
*/
cache->color[0] = 0xffffffff;
cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
cache->bo[0]->pitch = 4;
cache->dirty = 1;
cache->size = 1;
cache->last = 0;
 
return TRUE;
}
 
void
sna_render_flush_solid(struct sna *sna)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
 
DBG(("sna_render_flush_solid(size=%d)\n", cache->size));
assert(cache->dirty);
assert(cache->size);
 
kgem_bo_write(&sna->kgem, cache->cache_bo,
cache->color, cache->size*sizeof(uint32_t));
cache->dirty = 0;
cache->last = 0;
}
 
static void
sna_render_finish_solid(struct sna *sna, bool force)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
int i;
 
DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n",
force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty));
 
if (!force && cache->cache_bo->domain != DOMAIN_GPU)
return;
 
if (cache->dirty)
sna_render_flush_solid(sna);
 
for (i = 0; i < cache->size; i++) {
if (cache->bo[i] == NULL)
continue;
 
kgem_bo_destroy(&sna->kgem, cache->bo[i]);
cache->bo[i] = NULL;
}
kgem_bo_destroy(&sna->kgem, cache->cache_bo);
 
DBG(("sna_render_finish_solid reset\n"));
 
cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color));
cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t));
cache->bo[0]->pitch = 4;
if (force)
cache->size = 1;
}
 
 
struct kgem_bo *
sna_render_get_solid(struct sna *sna, uint32_t color)
{
struct sna_solid_cache *cache = &sna->render.solid_cache;
int i;
 
DBG(("%s: %08x\n", __FUNCTION__, color));
 
// if ((color & 0xffffff) == 0) /* alpha only */
// return kgem_bo_reference(sna->render.alpha_cache.bo[color>>24]);
 
if (color == 0xffffffff) {
DBG(("%s(white)\n", __FUNCTION__));
return kgem_bo_reference(cache->bo[0]);
}
 
if (cache->color[cache->last] == color) {
DBG(("sna_render_get_solid(%d) = %x (last)\n",
cache->last, color));
return kgem_bo_reference(cache->bo[cache->last]);
}
 
for (i = 1; i < cache->size; i++) {
if (cache->color[i] == color) {
if (cache->bo[i] == NULL) {
DBG(("sna_render_get_solid(%d) = %x (recreate)\n",
i, color));
goto create;
} else {
DBG(("sna_render_get_solid(%d) = %x (old)\n",
i, color));
goto done;
}
}
}
 
sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color));
 
i = cache->size++;
cache->color[i] = color;
cache->dirty = 1;
DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color));
 
create:
cache->bo[i] = kgem_create_proxy(cache->cache_bo,
i*sizeof(uint32_t), sizeof(uint32_t));
cache->bo[i]->pitch = 4;
 
done:
cache->last = i;
return kgem_bo_reference(cache->bo[i]);
}
 
#endif
 
 
int sna_blit_copy(bitmap_t *src_bitmap, int dst_x, int dst_y,
int w, int h, int src_x, int src_y)
 
{
struct sna_copy_op copy;
struct _Pixmap src, dst;
struct kgem_bo *src_bo;
 
char proc_info[1024];
int winx, winy;
 
get_proc_info(proc_info);
 
winx = *(uint32_t*)(proc_info+34);
winy = *(uint32_t*)(proc_info+38);
 
memset(&src, 0, sizeof(src));
memset(&dst, 0, sizeof(dst));
 
src.drawable.bitsPerPixel = 32;
src.drawable.width = src_bitmap->width;
src.drawable.height = src_bitmap->height;
 
dst.drawable.bitsPerPixel = 32;
dst.drawable.width = sna_fb.width;
dst.drawable.height = sna_fb.height;
 
memset(&copy, 0, sizeof(copy));
 
src_bo = (struct kgem_bo*)src_bitmap->handle;
 
if( sna_device->render.copy(sna_device, GXcopy,
&src, src_bo,
&dst, sna_fb.fb_bo, &copy) )
{
copy.blt(sna_device, &copy, src_x, src_y, w, h, winx+dst_x, winy+dst_y);
copy.done(sna_device, &copy);
}
 
kgem_submit(&sna_device->kgem);
 
return 0;
 
// __asm__ __volatile__("int3");
 
};
 
typedef struct
{
uint32_t width;
uint32_t height;
void *data;
uint32_t pitch;
struct kgem_bo *bo;
uint32_t bo_size;
uint32_t flags;
}surface_t;
 
 
 
int sna_create_bitmap(bitmap_t *bitmap)
{
surface_t *sf;
struct kgem_bo *bo;
 
sf = malloc(sizeof(*sf));
if(sf == NULL)
goto err_1;
 
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_create_2d(&sna_device->kgem, bitmap->width, bitmap->height,
32,I915_TILING_NONE, CREATE_CPU_MAP);
 
if(bo == NULL)
goto err_2;
 
void *map = kgem_bo_map(&sna_device->kgem, bo);
if(map == NULL)
goto err_3;
 
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = map;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
sf->flags = bitmap->flags;
 
bitmap->handle = (uint32_t)sf;
__lock_release_recursive(__sna_lock);
 
return 0;
 
err_3:
kgem_bo_destroy(&sna_device->kgem, bo);
err_2:
__lock_release_recursive(__sna_lock);
free(sf);
err_1:
return -1;
};
 
int sna_bitmap_from_handle(bitmap_t *bitmap, uint32_t handle)
{
surface_t *sf;
struct kgem_bo *bo;
 
sf = malloc(sizeof(*sf));
if(sf == NULL)
goto err_1;
 
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_bo_from_handle(&sna_device->kgem, handle, bitmap->pitch, bitmap->height);
 
__lock_release_recursive(__sna_lock);
 
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = NULL;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
sf->flags = bitmap->flags;
 
bitmap->handle = (uint32_t)sf;
 
return 0;
 
err_2:
__lock_release_recursive(__sna_lock);
free(sf);
err_1:
return -1;
};
 
void sna_set_bo_handle(bitmap_t *bitmap, int handle)
{
surface_t *sf = to_surface(bitmap);
struct kgem_bo *bo = sf->bo;
bo->handle = handle;
}
 
int sna_destroy_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
 
__lock_acquire_recursive(__sna_lock);
 
kgem_bo_destroy(&sna_device->kgem, sf->bo);
 
__lock_release_recursive(__sna_lock);
 
free(sf);
 
bitmap->handle = -1;
bitmap->data = (void*)-1;
bitmap->pitch = -1;
 
return 0;
};
 
int sna_lock_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
 
// printf("%s\n", __FUNCTION__);
__lock_acquire_recursive(__sna_lock);
 
kgem_bo_sync__cpu(&sna_device->kgem, sf->bo);
 
__lock_release_recursive(__sna_lock);
 
bitmap->data = sf->data;
bitmap->pitch = sf->pitch;
 
return 0;
};
 
int sna_resize_bitmap(bitmap_t *bitmap)
{
surface_t *sf = to_surface(bitmap);
struct kgem *kgem = &sna_device->kgem;
struct kgem_bo *bo = sf->bo;
 
uint32_t size;
uint32_t pitch;
 
bitmap->pitch = -1;
bitmap->data = (void *) -1;
 
size = kgem_surface_size(kgem,kgem->has_relaxed_fencing, CREATE_CPU_MAP,
bitmap->width, bitmap->height, 32, I915_TILING_NONE, &pitch);
assert(size && size <= kgem->max_object_size);
 
if(sf->bo_size >= size)
{
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->pitch = pitch;
bo->pitch = pitch;
 
return 0;
}
else
{
__lock_acquire_recursive(__sna_lock);
 
sna_bo_destroy(kgem, bo);
 
sf->bo = NULL;
 
bo = kgem_create_2d(kgem, bitmap->width, bitmap->height,
32, I915_TILING_NONE, CREATE_CPU_MAP);
 
if(bo == NULL)
{
__lock_release_recursive(__sna_lock);
return -1;
};
 
void *map = kgem_bo_map(kgem, bo);
if(map == NULL)
{
sna_bo_destroy(kgem, bo);
__lock_release_recursive(__sna_lock);
return -1;
};
 
__lock_release_recursive(__sna_lock);
 
sf->width = bitmap->width;
sf->height = bitmap->height;
sf->data = map;
sf->pitch = bo->pitch;
sf->bo = bo;
sf->bo_size = PAGE_SIZE * bo->size.pages.count;
}
 
return 0;
};
 
 
 
int sna_create_mask()
{
struct kgem_bo *bo;
 
// printf("%s width %d height %d\n", __FUNCTION__, sna_fb.width, sna_fb.height);
 
__lock_acquire_recursive(__sna_lock);
 
bo = kgem_create_2d(&sna_device->kgem, sna_fb.width, sna_fb.height,
8,I915_TILING_NONE, CREATE_CPU_MAP);
 
if(unlikely(bo == NULL))
goto err_1;
 
int *map = kgem_bo_map(&sna_device->kgem, bo);
if(map == NULL)
goto err_2;
 
__lock_release_recursive(__sna_lock);
 
memset(map, 0, bo->pitch * sna_fb.height);
 
tls_set(tls_mask, bo);
 
return 0;
 
err_2:
kgem_bo_destroy(&sna_device->kgem, bo);
err_1:
__lock_release_recursive(__sna_lock);
return -1;
};
 
 
bool
gen6_composite(struct sna *sna,
uint8_t op,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
 
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
 
int sna_blit_tex(bitmap_t *bitmap, bool scale, int dst_x, int dst_y,
int w, int h, int src_x, int src_y)
 
{
surface_t *sf = to_surface(bitmap);
 
struct drm_i915_mask_update update;
 
struct sna_composite_op composite;
struct _Pixmap src, dst, mask;
struct kgem_bo *src_bo, *mask_bo;
int winx, winy;
 
char proc_info[1024];
 
get_proc_info(proc_info);
 
winx = *(uint32_t*)(proc_info+34);
winy = *(uint32_t*)(proc_info+38);
// winw = *(uint32_t*)(proc_info+42)+1;
// winh = *(uint32_t*)(proc_info+46)+1;
 
mask_bo = tls_get(tls_mask);
 
if(unlikely(mask_bo == NULL))
{
sna_create_mask();
mask_bo = tls_get(tls_mask);
if( mask_bo == NULL)
return -1;
};
 
if(kgem_update_fb(&sna_device->kgem, &sna_fb))
{
__lock_acquire_recursive(__sna_lock);
kgem_bo_destroy(&sna_device->kgem, mask_bo);
__lock_release_recursive(__sna_lock);
 
sna_create_mask();
mask_bo = tls_get(tls_mask);
if( mask_bo == NULL)
return -1;
}
 
VG_CLEAR(update);
update.handle = mask_bo->handle;
update.bo_map = (int)kgem_bo_map__cpu(&sna_device->kgem, mask_bo);
drmIoctl(sna_device->kgem.fd, SRV_MASK_UPDATE, &update);
mask_bo->pitch = update.bo_pitch;
 
memset(&src, 0, sizeof(src));
memset(&dst, 0, sizeof(dst));
memset(&mask, 0, sizeof(dst));
 
src.drawable.bitsPerPixel = 32;
 
src.drawable.width = sf->width;
src.drawable.height = sf->height;
 
dst.drawable.bitsPerPixel = 32;
dst.drawable.width = sna_fb.width;
dst.drawable.height = sna_fb.height;
 
mask.drawable.bitsPerPixel = 8;
mask.drawable.width = update.width;
mask.drawable.height = update.height;
 
memset(&composite, 0, sizeof(composite));
 
src_bo = sf->bo;
 
__lock_acquire_recursive(__sna_lock);
 
 
if( sna_device->render.blit_tex(sna_device, PictOpSrc,scale,
&src, src_bo,
&mask, mask_bo,
&dst, sna_fb.fb_bo,
src_x, src_y,
dst_x, dst_y,
winx+dst_x, winy+dst_y,
w, h,
&composite) )
{
struct sna_composite_rectangles r;
 
r.src.x = src_x;
r.src.y = src_y;
r.mask.x = dst_x;
r.mask.y = dst_y;
r.dst.x = winx+dst_x;
r.dst.y = winy+dst_y;
r.width = w;
r.height = h;
 
composite.blt(sna_device, &composite, &r);
composite.done(sna_device, &composite);
 
};
 
kgem_submit(&sna_device->kgem);
 
__lock_release_recursive(__sna_lock);
 
bitmap->data = (void*)-1;
bitmap->pitch = -1;
 
return 0;
}
 
 
 
 
 
 
 
static const struct intel_device_info intel_generic_info = {
.gen = -1,
};
 
static const struct intel_device_info intel_i915_info = {
.gen = 030,
};
static const struct intel_device_info intel_i945_info = {
.gen = 031,
};
 
static const struct intel_device_info intel_g33_info = {
.gen = 033,
};
 
static const struct intel_device_info intel_i965_info = {
.gen = 040,
};
 
static const struct intel_device_info intel_g4x_info = {
.gen = 045,
};
 
static const struct intel_device_info intel_ironlake_info = {
.gen = 050,
};
 
static const struct intel_device_info intel_sandybridge_info = {
.gen = 060,
};
 
static const struct intel_device_info intel_ivybridge_info = {
.gen = 070,
};
 
static const struct intel_device_info intel_valleyview_info = {
.gen = 071,
};
 
static const struct intel_device_info intel_haswell_info = {
.gen = 075,
};
 
#define INTEL_DEVICE_MATCH(d,i) \
{ 0x8086, (d), PCI_MATCH_ANY, PCI_MATCH_ANY, 0x3 << 16, 0xff << 16, (intptr_t)(i) }
 
 
static const struct pci_id_match intel_device_match[] = {
 
INTEL_I915G_IDS(&intel_i915_info),
INTEL_I915GM_IDS(&intel_i915_info),
INTEL_I945G_IDS(&intel_i945_info),
INTEL_I945GM_IDS(&intel_i945_info),
 
INTEL_G33_IDS(&intel_g33_info),
INTEL_PINEVIEW_IDS(&intel_g33_info),
 
INTEL_I965G_IDS(&intel_i965_info),
INTEL_I965GM_IDS(&intel_i965_info),
 
INTEL_G45_IDS(&intel_g4x_info),
INTEL_GM45_IDS(&intel_g4x_info),
 
INTEL_IRONLAKE_D_IDS(&intel_ironlake_info),
INTEL_IRONLAKE_M_IDS(&intel_ironlake_info),
 
INTEL_SNB_D_IDS(&intel_sandybridge_info),
INTEL_SNB_M_IDS(&intel_sandybridge_info),
 
INTEL_IVB_D_IDS(&intel_ivybridge_info),
INTEL_IVB_M_IDS(&intel_ivybridge_info),
 
INTEL_HSW_D_IDS(&intel_haswell_info),
INTEL_HSW_M_IDS(&intel_haswell_info),
 
INTEL_VLV_D_IDS(&intel_valleyview_info),
INTEL_VLV_M_IDS(&intel_valleyview_info),
 
INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
 
{ 0, 0, 0 },
};
 
const struct pci_id_match *PciDevMatch(uint16_t dev,const struct pci_id_match *list)
{
while(list->device_id)
{
if(dev==list->device_id)
return list;
list++;
}
return NULL;
}
 
const struct intel_device_info *
intel_detect_chipset(struct pci_device *pci)
{
const struct pci_id_match *ent = NULL;
 
ent = PciDevMatch(pci->device_id, intel_device_match);
 
if(ent != NULL)
return (const struct intel_device_info*)ent->match_data;
else
return &intel_generic_info;
 
#if 0
for (i = 0; intel_chipsets[i].name != NULL; i++) {
if (DEVICE_ID(pci) == intel_chipsets[i].token) {
name = intel_chipsets[i].name;
break;
}
}
if (name == NULL) {
xf86DrvMsg(scrn->scrnIndex, X_WARNING, "unknown chipset\n");
name = "unknown";
} else {
xf86DrvMsg(scrn->scrnIndex, from,
"Integrated Graphics Chipset: Intel(R) %s\n",
name);
}
 
scrn->chipset = name;
#endif
 
}
 
int intel_get_device_id(int fd)
{
struct drm_i915_getparam gp;
int devid = 0;
 
memset(&gp, 0, sizeof(gp));
gp.param = I915_PARAM_CHIPSET_ID;
gp.value = &devid;
 
if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp))
return 0;
 
return devid;
}
 
int drmIoctl(int fd, unsigned long request, void *arg)
{
ioctl_t io;
 
io.handle = fd;
io.io_code = request;
io.input = arg;
io.inp_size = 64;
io.output = NULL;
io.out_size = 0;
 
return call_service(&io);
}
 
 
 
/drivers/video/Intel-2D/sna/sna.h
0,0 → 1,607
/**************************************************************************
 
Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
Copyright © 2002 David Dawes
 
All Rights Reserved.
 
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sub license, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial portions
of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**************************************************************************/
 
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* David Dawes <dawes@xfree86.org>
*
*/
 
#ifndef _SNA_H_
#define _SNA_H_
 
#include <stdint.h>
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "compiler.h"
 
 
#include <memory.h>
#include <malloc.h>
#include <errno.h>
#include <kos32sys.h>
 
#include "intel_driver.h"
#include "pciaccess.h"
 
#include <drm.h>
#include <i915_drm.h>
 
#ifdef HAVE_DRI2_H
#include <dri2.h>
#endif
 
#if HAVE_UDEV
#include <libudev.h>
#endif
 
#if 0
#include <xorg-server.h>
 
#include <xf86Crtc.h>
#if XF86_CRTC_VERSION >= 5
#define HAS_PIXMAP_SHARING 1
#endif
 
#include <xf86str.h>
#include <windowstr.h>
#include <glyphstr.h>
#include <picturestr.h>
#include <gcstruct.h>
#include <xvdix.h>
 
#include <pciaccess.h>
 
#include <xf86drmMode.h>
 
#include "../compat-api.h"
 
#endif
 
#include <assert.h>
 
#define ErrorF printf
 
#if HAS_DEBUG_FULL
#define DBG(x) ErrorF x
#else
#define DBG(x)
#endif
 
#define DEBUG_NO_BLT 0
 
#define DEBUG_FLUSH_BATCH 0
 
#define TEST_ALL 0
#define TEST_ACCEL (TEST_ALL || 0)
#define TEST_BATCH (TEST_ALL || 0)
#define TEST_BLT (TEST_ALL || 0)
#define TEST_COMPOSITE (TEST_ALL || 0)
#define TEST_DAMAGE (TEST_ALL || 0)
#define TEST_GRADIENT (TEST_ALL || 0)
#define TEST_GLYPHS (TEST_ALL || 0)
#define TEST_IO (TEST_ALL || 0)
#define TEST_KGEM (TEST_ALL || 0)
#define TEST_RENDER (TEST_ALL || 0)
 
int drmIoctl(int fd, unsigned long request, void *arg);
 
 
#define PIXMAN_FORMAT(bpp,type,a,r,g,b) (((bpp) << 24) | \
((type) << 16) | \
((a) << 12) | \
((r) << 8) | \
((g) << 4) | \
((b)))
#define PIXMAN_TYPE_OTHER 0
#define PIXMAN_TYPE_A 1
#define PIXMAN_TYPE_ARGB 2
#define PIXMAN_TYPE_ABGR 3
#define PIXMAN_TYPE_COLOR 4
#define PIXMAN_TYPE_GRAY 5
#define PIXMAN_TYPE_YUY2 6
#define PIXMAN_TYPE_YV12 7
#define PIXMAN_TYPE_BGRA 8
#define PIXMAN_TYPE_RGBA 9
#define PIXMAN_TYPE_ARGB_SRGB 10
 
/* 32bpp formats */
typedef enum {
PIXMAN_a8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
PIXMAN_x8r8g8b8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_a8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
PIXMAN_x8b8g8r8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
PIXMAN_b8g8r8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
PIXMAN_b8g8r8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
PIXMAN_r8g8b8a8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
PIXMAN_r8g8b8x8 = PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
PIXMAN_x14r6g6b6 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
/* sRGB formats */
PIXMAN_a8r8g8b8_sRGB = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB_SRGB,8,8,8,8),
 
/* 24bpp formats */
PIXMAN_r8g8b8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
PIXMAN_b8g8r8 = PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
 
/* 16bpp formats */
PIXMAN_r5g6b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
PIXMAN_b5g6r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
 
PIXMAN_a1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
PIXMAN_x1r5g5b5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
PIXMAN_a1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
PIXMAN_x1b5g5r5 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
PIXMAN_a4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
PIXMAN_x4r4g4b4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
PIXMAN_a4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
PIXMAN_x4b4g4r4 = PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
 
/* 8bpp formats */
PIXMAN_a8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
PIXMAN_r3g3b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
PIXMAN_b2g3r3 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
PIXMAN_a2r2g2b2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
PIXMAN_a2b2g2r2 = PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
 
PIXMAN_c8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g8 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
PIXMAN_x4a4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
 
PIXMAN_x4c4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_x4g4 = PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 4bpp formats */
PIXMAN_a4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
PIXMAN_r1g2b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
PIXMAN_b1g2r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
PIXMAN_a1r1g1b1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
PIXMAN_a1b1g1r1 = PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
 
PIXMAN_c4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
PIXMAN_g4 = PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* 1bpp formats */
PIXMAN_a1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
 
PIXMAN_g1 = PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 
/* YUV formats */
PIXMAN_yuy2 = PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
PIXMAN_yv12 = PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
 
} pixman_format_code_t;
 
typedef enum _PictFormatShort {
 
PICT_a2r10g10b10 = PIXMAN_a2r10g10b10,
PICT_x2r10g10b10 = PIXMAN_x2r10g10b10,
PICT_a2b10g10r10 = PIXMAN_a2b10g10r10,
PICT_x2b10g10r10 = PIXMAN_x2b10g10r10,
 
PICT_a8r8g8b8 = PIXMAN_a8r8g8b8,
PICT_x8r8g8b8 = PIXMAN_x8r8g8b8,
PICT_a8b8g8r8 = PIXMAN_a8b8g8r8,
PICT_x8b8g8r8 = PIXMAN_x8b8g8r8,
PICT_b8g8r8a8 = PIXMAN_b8g8r8a8,
PICT_b8g8r8x8 = PIXMAN_b8g8r8x8,
 
/* 24bpp formats */
PICT_r8g8b8 = PIXMAN_r8g8b8,
PICT_b8g8r8 = PIXMAN_b8g8r8,
 
/* 16bpp formats */
PICT_r5g6b5 = PIXMAN_r5g6b5,
PICT_b5g6r5 = PIXMAN_b5g6r5,
 
PICT_a1r5g5b5 = PIXMAN_a1r5g5b5,
PICT_x1r5g5b5 = PIXMAN_x1r5g5b5,
PICT_a1b5g5r5 = PIXMAN_a1b5g5r5,
PICT_x1b5g5r5 = PIXMAN_x1b5g5r5,
PICT_a4r4g4b4 = PIXMAN_a4r4g4b4,
PICT_x4r4g4b4 = PIXMAN_x4r4g4b4,
PICT_a4b4g4r4 = PIXMAN_a4b4g4r4,
PICT_x4b4g4r4 = PIXMAN_x4b4g4r4,
 
/* 8bpp formats */
PICT_a8 = PIXMAN_a8,
PICT_r3g3b2 = PIXMAN_r3g3b2,
PICT_b2g3r3 = PIXMAN_b2g3r3,
PICT_a2r2g2b2 = PIXMAN_a2r2g2b2,
PICT_a2b2g2r2 = PIXMAN_a2b2g2r2,
 
PICT_c8 = PIXMAN_c8,
PICT_g8 = PIXMAN_g8,
 
PICT_x4a4 = PIXMAN_x4a4,
 
PICT_x4c4 = PIXMAN_x4c4,
PICT_x4g4 = PIXMAN_x4g4,
 
/* 4bpp formats */
PICT_a4 = PIXMAN_a4,
PICT_r1g2b1 = PIXMAN_r1g2b1,
PICT_b1g2r1 = PIXMAN_b1g2r1,
PICT_a1r1g1b1 = PIXMAN_a1r1g1b1,
PICT_a1b1g1r1 = PIXMAN_a1b1g1r1,
 
PICT_c4 = PIXMAN_c4,
PICT_g4 = PIXMAN_g4,
 
/* 1bpp formats */
PICT_a1 = PIXMAN_a1,
 
PICT_g1 = PIXMAN_g1
} PictFormatShort;
 
#define PIXMAN_FORMAT_A(f) (((f) >> 12) & 0x0f)
#define PIXMAN_FORMAT_RGB(f) (((f) ) & 0xfff)
 
#define PICT_FORMAT_A(f) PIXMAN_FORMAT_A(f)
 
#define RepeatNone 0
#define RepeatNormal 1
#define RepeatPad 2
#define RepeatReflect 3
 
#define PictFilterNearest 0
#define PictFilterBilinear 1
 
#define PictFilterFast 2
#define PictFilterGood 3
#define PictFilterBest 4
 
#define PictFilterConvolution 5
 
typedef int32_t pixman_fixed_16_16_t;
typedef pixman_fixed_16_16_t pixman_fixed_t;
 
struct pixman_transform
{
pixman_fixed_t matrix[3][3];
};
 
typedef unsigned long Picture;
typedef unsigned long PictFormat;
 
typedef struct _Pixmap *PixmapPtr;
typedef struct _Picture *PicturePtr;
typedef struct _Drawable *DrawablePtr;
typedef struct _PictFormat *PictFormatPtr;
 
typedef struct pixman_transform PictTransform, *PictTransformPtr;
 
 
 
typedef struct _Drawable {
unsigned char type; /* DRAWABLE_<type> */
unsigned char class; /* specific to type */
unsigned char depth;
unsigned char bitsPerPixel;
unsigned int id; /* resource id */
short x; /* window: screen absolute, pixmap: 0 */
short y; /* window: screen absolute, pixmap: 0 */
unsigned short width;
unsigned short height;
} DrawableRec;
 
/*
* PIXMAP -- device dependent
*/
 
typedef struct _Pixmap {
DrawableRec drawable;
// PrivateRec *devPrivates;
int refcnt;
int devKind; /* This is the pitch of the pixmap, typically width*bpp/8. */
// DevUnion devPrivate; /* When !NULL, devPrivate.ptr points to the raw pixel data. */
#ifdef COMPOSITE
short screen_x;
short screen_y;
#endif
unsigned usage_hint; /* see CREATE_PIXMAP_USAGE_* */
 
PixmapPtr master_pixmap; /* pointer to master copy of pixmap for pixmap sharing */
} PixmapRec;
 
typedef struct _PictFormat {
uint32_t id;
uint32_t format; /* except bpp */
unsigned char type;
unsigned char depth;
// DirectFormatRec direct;
// IndexFormatRec index;
} PictFormatRec;
 
typedef struct _Picture {
DrawablePtr pDrawable;
// PictFormatPtr pFormat;
PictFormatShort format; /* PICT_FORMAT */
int refcnt;
uint32_t id;
unsigned int repeat:1;
unsigned int graphicsExposures:1;
unsigned int subWindowMode:1;
unsigned int polyEdge:1;
unsigned int polyMode:1;
unsigned int freeCompClip:1;
unsigned int clientClipType:2;
unsigned int componentAlpha:1;
unsigned int repeatType:2;
unsigned int filter:3;
// unsigned int stateChanges:CPLastBit;
// unsigned int unused:18 - CPLastBit;
 
// PicturePtr alphaMap;
 
// PictTransform *transform;
 
// SourcePictPtr pSourcePict;
// xFixed *filter_params;
// int filter_nparams;
} PictureRec;
 
#define PolyModePrecise 0
#define PolyModeImprecise 1
 
 
struct sna_fb
{
uint32_t width;
uint32_t height;
uint32_t pitch;
uint32_t tiling;
 
struct kgem_bo *fb_bo;
};
 
struct pixman_box16
{
int16_t x1, y1, x2, y2;
};
 
typedef struct pixman_box16 BoxRec;
typedef unsigned int CARD32;
typedef unsigned short CARD16;
 
#include "sna_render.h"
#include "kgem.h"
 
#define GXclear 0x0
#define GXcopy 0x3
 
#define PictOpClear 0
#define PictOpSrc 1
#define PictOpDst 2
#define PictOpOver 3
#define PictOpOverReverse 4
#define PictOpIn 5
#define PictOpInReverse 6
#define PictOpOut 7
#define PictOpOutReverse 8
#define PictOpAtop 9
#define PictOpAtopReverse 10
#define PictOpXor 11
#define PictOpAdd 12
#define PictOpSaturate 13
#define PictOpMaximum 13
 
 
#define SNA_CURSOR_X 64
#define SNA_CURSOR_Y SNA_CURSOR_X
 
struct sna_client {
int is_compositor; /* only 4 bits used */
};
 
 
//#define assert(x)
 
 
struct sna {
struct kgem kgem;
 
unsigned scrn;
 
unsigned flags;
#define SNA_NO_WAIT 0x1
#define SNA_NO_FLIP 0x2
#define SNA_TRIPLE_BUFFER 0x4
#define SNA_TEAR_FREE 0x10
#define SNA_FORCE_SHADOW 0x20
#define SNA_FLUSH_GTT 0x40
#define SNA_IS_HOSTED 0x80
#define SNA_PERFORMANCE 0x100
#define SNA_POWERSAVE 0x200
#define SNA_REPROBE 0x80000000
 
unsigned cpu_features;
#define MMX 0x1
#define SSE 0x2
#define SSE2 0x4
#define SSE3 0x8
#define SSSE3 0x10
#define SSE4_1 0x20
#define SSE4_2 0x40
#define AVX 0x80
#define AVX2 0x100
 
struct list flush_pixmaps;
struct list active_pixmaps;
 
 
 
 
 
unsigned int tiling;
#define SNA_TILING_FB 0x1
#define SNA_TILING_2D 0x2
#define SNA_TILING_ALL (~0)
 
struct pci_device *PciInfo;
const struct intel_device_info *info;
 
// PicturePtr clear;
struct {
uint32_t fill_bo;
uint32_t fill_pixel;
uint32_t fill_alu;
} blt_state;
union {
struct gen3_render_state gen3;
struct gen4_render_state gen4;
struct gen5_render_state gen5;
struct gen6_render_state gen6;
struct gen7_render_state gen7;
} render_state;
 
 
/* Broken-out options. */
// OptionInfoPtr Options;
 
/* Driver phase/state information */
// Bool suspended;
 
struct sna_render render;
 
#if DEBUG_MEMORY
struct {
int pixmap_allocs;
int cpu_bo_allocs;
size_t shadow_pixels_bytes;
size_t cpu_bo_bytes;
} debug_memory;
#endif
};
 
static inline struct sna *
to_sna_from_kgem(struct kgem *kgem)
{
return container_of(kgem, struct sna, kgem);
}
 
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
#endif
 
#ifndef ALIGN
#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
#endif
 
#ifndef MIN
#define MIN(a,b) ((a) <= (b) ? (a) : (b))
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) >= (b) ? (a) : (b))
#endif
static inline bool
_sna_transform_point(const PictTransform *transform,
int64_t x, int64_t y, int64_t result[3])
{
int j;
 
for (j = 0; j < 3; j++)
result[j] = (transform->matrix[j][0] * x +
transform->matrix[j][1] * y +
transform->matrix[j][2]);
 
return result[2] != 0;
}
 
static inline void
_sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out)
{
 
int64_t result[3];
 
_sna_transform_point(transform, x, y, result);
*x_out = result[0] / (double)result[2];
*y_out = result[1] / (double)result[2];
}
 
static inline void
_sna_get_transformed_scaled(int x, int y,
const PictTransform *transform, const float *sf,
float *x_out, float *y_out)
{
*x_out = sf[0] * (transform->matrix[0][0] * x +
transform->matrix[0][1] * y +
transform->matrix[0][2]);
 
*y_out = sf[1] * (transform->matrix[1][0] * x +
transform->matrix[1][1] * y +
transform->matrix[1][2]);
}
 
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out);
 
void
sna_get_transformed_coordinates_3d(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out, float *z_out);
 
bool sna_transform_is_affine(const PictTransform *t);
bool sna_transform_is_integer_translation(const PictTransform *t,
int16_t *tx, int16_t *ty);
bool sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx, pixman_fixed_t *ty);
static inline bool
sna_affine_transform_is_rotation(const PictTransform *t)
{
assert(sna_transform_is_affine(t));
return t->matrix[0][1] | t->matrix[1][0];
}
 
static inline bool
sna_transform_equal(const PictTransform *a, const PictTransform *b)
{
if (a == b)
return true;
 
if (a == NULL || b == NULL)
return false;
 
return memcmp(a, b, sizeof(*a)) == 0;
}
#endif /* _SNA_H */
/drivers/video/Intel-2D/sna/sna_cpu.c
0,0 → 1,116
/*
* Copyright (c) 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_cpuid.h"
 
#define xgetbv(index,eax,edx) \
__asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
 
#define has_YMM 0x1
 
unsigned sna_cpu_detect(void)
{
unsigned max = __get_cpuid_max(BASIC_CPUID, NULL);
unsigned int eax, ebx, ecx, edx;
unsigned features = 0;
unsigned extra = 0;
 
if (max >= 1) {
__cpuid(1, eax, ebx, ecx, edx);
if (ecx & bit_SSE3)
features |= SSE3;
 
if (ecx & bit_SSSE3)
features |= SSSE3;
 
if (ecx & bit_SSE4_1)
features |= SSE4_1;
 
if (ecx & bit_SSE4_2)
features |= SSE4_2;
 
if (ecx & bit_OSXSAVE) {
unsigned int bv_eax, bv_ecx;
xgetbv(0, bv_eax, bv_ecx);
if ((bv_eax & 6) == 6)
extra |= has_YMM;
}
 
if ((extra & has_YMM) && (ecx & bit_AVX))
features |= AVX;
 
if (edx & bit_MMX)
features |= MMX;
 
if (edx & bit_SSE)
features |= SSE;
 
if (edx & bit_SSE2)
features |= SSE2;
}
 
if (max >= 7) {
__cpuid_count(7, 0, eax, ebx, ecx, edx);
if ((extra & has_YMM) && (ebx & bit_AVX2))
features |= AVX2;
}
 
return features;
}
 
char *sna_cpu_features_to_string(unsigned features, char *line)
{
char *ret = line;
 
#ifdef __x86_64__
line += sprintf (line, "x86-64");
#else
line += sprintf (line, "x86");
#endif
 
if (features & SSE2)
line += sprintf (line, ", sse2");
if (features & SSE3)
line += sprintf (line, ", sse3");
if (features & SSSE3)
line += sprintf (line, ", ssse3");
if (features & SSE4_1)
line += sprintf (line, ", sse4.1");
if (features & SSE4_2)
line += sprintf (line, ", sse4.2");
if (features & AVX)
line += sprintf (line, ", avx");
if (features & AVX2)
line += sprintf (line, ", avx2");
 
return ret;
}
/drivers/video/Intel-2D/sna/sna_cpuid.h
0,0 → 1,86
/*
* Copyright (c) 2013 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
/* Small wrapper around compiler specific implementation details of cpuid */
 
#ifndef SNA_CPUID_H
#define SNA_CPUID_H
 
#include "compiler.h"
 
#if HAS_GCC(4, 4) /* for __cpuid_count() */
#include <cpuid.h>
#else
#define __get_cpuid_max(x, y) 0
#define __cpuid(level, a, b, c, d)
#define __cpuid_count(level, count, a, b, c, d)
#endif
 
#define BASIC_CPUID 0x0
#define EXTENDED_CPUID 0x80000000
 
#ifndef bit_MMX
#define bit_MMX (1 << 23)
#endif
 
#ifndef bit_SSE
#define bit_SSE (1 << 25)
#endif
 
#ifndef bit_SSE2
#define bit_SSE2 (1 << 26)
#endif
 
#ifndef bit_SSE3
#define bit_SSE3 (1 << 0)
#endif
 
#ifndef bit_SSSE3
#define bit_SSSE3 (1 << 9)
#endif
 
#ifndef bit_SSE4_1
#define bit_SSE4_1 (1 << 19)
#endif
 
#ifndef bit_SSE4_2
#define bit_SSE4_2 (1 << 20)
#endif
 
#ifndef bit_OSXSAVE
#define bit_OSXSAVE (1 << 27)
#endif
 
#ifndef bit_AVX
#define bit_AVX (1 << 28)
#endif
 
#ifndef bit_AVX2
#define bit_AVX2 (1<<5)
#endif
 
#endif /* SNA_CPUID_H */
/drivers/video/Intel-2D/sna/sna_reg.h
0,0 → 1,82
#ifndef SNA_REG_H
#define SNA_REG_H
 
/* Flush */
#define MI_FLUSH (0x04<<23)
#define MI_FLUSH_DW (0x26<<23)
 
#define MI_WRITE_DIRTY_STATE (1<<4)
#define MI_END_SCENE (1<<3)
#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2)
#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1)
#define MI_INVALIDATE_MAP_CACHE (1<<0)
/* broadwater flush bits */
#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3)
 
#define MI_BATCH_BUFFER_END (0xA << 23)
 
/* Noop */
#define MI_NOOP 0x00
#define MI_NOOP_WRITE_ID (1<<22)
#define MI_NOOP_ID_MASK (1<<22 - 1)
 
/* Wait for Events */
#define MI_WAIT_FOR_EVENT (0x03<<23)
#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18)
#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17)
#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16)
#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7)
#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5)
#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3)
#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1)
 
/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */
#define MI_LOAD_SCAN_LINES_INCL (0x12<<23)
#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0)
#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20)
 
/* BLT commands */
#define BLT_WRITE_ALPHA (1<<21)
#define BLT_WRITE_RGB (1<<20)
#define BLT_SRC_TILED (1<<15)
#define BLT_DST_TILED (1<<11)
 
#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4))
#define XY_SETUP_BLT ((2<<29)|(1<<22)|6)
#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7)
#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1)
#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1)
#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16))
#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4)
#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
 
/* FLUSH commands */
#define BRW_3D(Pipeline,Opcode,Subopcode) \
((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
#define PIPE_CONTROL BRW_3D(3, 2, 0)
#define PIPE_CONTROL_NOWRITE (0 << 14)
#define PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define PIPE_CONTROL_WRITE_TIME (3 << 14)
#define PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define PIPE_CONTROL_WC_FLUSH (1 << 12)
#define PIPE_CONTROL_IS_FLUSH (1 << 11)
#define PIPE_CONTROL_TC_FLUSH (1 << 10)
#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
#endif
/drivers/video/Intel-2D/sna/sna_render.h
0,0 → 1,786
#ifndef SNA_RENDER_H
#define SNA_RENDER_H
 
#include "compiler.h"
 
#include <stdbool.h>
#include <stdint.h>
 
#define GRADIENT_CACHE_SIZE 16
 
#define GXinvalid 0xff
 
#define HW_BIT_BLIT (1<<0) /* BGRX blitter */
#define HW_TEX_BLIT (1<<1) /* stretch blit */
#define HW_VID_BLIT (1<<2) /* planar and packed video */
 
struct sna;
struct sna_glyph;
struct sna_video;
struct sna_video_frame;
struct brw_compile;
 
struct sna_composite_rectangles {
struct sna_coordinate {
int16_t x, y;
} src, mask, dst;
int16_t width, height;
};
 
struct sna_composite_op {
fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
 
void (*done)(struct sna *sna, const struct sna_composite_op *op);
 
struct sna_damage **damage;
 
uint32_t op;
 
struct {
PixmapPtr pixmap;
CARD32 format;
struct kgem_bo *bo;
int16_t x, y;
uint16_t width, height;
} dst;
 
struct sna_composite_channel {
struct kgem_bo *bo;
PictTransform *transform;
uint16_t width;
uint16_t height;
uint32_t pict_format;
uint32_t card_format;
uint32_t filter;
uint32_t repeat;
uint32_t is_affine : 1;
uint32_t is_solid : 1;
uint32_t is_linear : 1;
uint32_t is_opaque : 1;
uint32_t alpha_fixup : 1;
uint32_t rb_reversed : 1;
int16_t offset[2];
float scale[2];
 
// pixman_transform_t embedded_transform;
 
union {
struct {
float dx, dy, offset;
} linear;
struct {
uint32_t pixel;
} gen2;
struct gen3_shader_channel {
int type;
uint32_t mode;
uint32_t constants;
} gen3;
} u;
} src, mask;
uint32_t is_affine : 1;
uint32_t has_component_alpha : 1;
uint32_t need_magic_ca_pass : 1;
uint32_t rb_reversed : 1;
 
int16_t floats_per_vertex;
int16_t floats_per_rect;
fastcall void (*prim_emit)(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r);
 
struct sna_composite_redirect {
struct kgem_bo *real_bo;
struct sna_damage **real_damage, *damage;
BoxRec box;
} redirect;
 
union {
struct sna_blt_state {
PixmapPtr src_pixmap;
int16_t sx, sy;
 
uint32_t inplace :1;
uint32_t overwrites:1;
uint32_t bpp : 6;
 
uint32_t cmd;
uint32_t br13;
uint32_t pitch[2];
uint32_t pixel;
struct kgem_bo *bo[2];
} blt;
 
struct {
float constants[8];
uint32_t num_constants;
} gen3;
 
struct {
int wm_kernel;
int ve_id;
} gen4;
 
struct {
int16_t wm_kernel;
int16_t ve_id;
} gen5;
 
struct {
uint32_t flags;
} gen6;
 
struct {
uint32_t flags;
} gen7;
} u;
 
void *priv;
};
 
struct sna_copy_op {
struct sna_composite_op base;
 
void (*blt)(struct sna *sna, const struct sna_copy_op *op,
int16_t sx, int16_t sy,
int16_t w, int16_t h,
int16_t dx, int16_t dy);
void (*done)(struct sna *sna, const struct sna_copy_op *op);
};
 
struct sna_render {
int active;
 
int caps;
 
int max_3d_size;
int max_3d_pitch;
 
unsigned prefer_gpu;
#define PREFER_GPU_BLT 0x1
#define PREFER_GPU_RENDER 0x2
#define PREFER_GPU_SPANS 0x4
 
bool (*composite)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src, PicturePtr mask,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t w, int16_t h,
struct sna_composite_op *tmp);
 
#if 0
bool (*check_composite_spans)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src,
int16_t w, int16_t h, unsigned flags);
bool (*composite_spans)(struct sna *sna, uint8_t op,
PicturePtr dst, PicturePtr src,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t w, int16_t h,
unsigned flags,
struct sna_composite_spans_op *tmp);
#define COMPOSITE_SPANS_RECTILINEAR 0x1
#define COMPOSITE_SPANS_INPLACE_HINT 0x2
 
bool (*video)(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
PixmapPtr pixmap);
 
bool (*fill_boxes)(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n);
bool (*fill)(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
struct sna_fill_op *tmp);
bool (*fill_one)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint8_t alu);
bool (*clear)(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo);
 
bool (*copy_boxes)(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags);
#define COPY_LAST 0x1
#define COPY_SYNC 0x2
 
#endif
 
bool (*blit_tex)(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
bool (*copy)(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
struct sna_copy_op *op);
 
void (*flush)(struct sna *sna);
void (*reset)(struct sna *sna);
void (*fini)(struct sna *sna);
 
#if 0
 
struct sna_alpha_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[256+7];
} alpha_cache;
 
struct sna_solid_cache {
struct kgem_bo *cache_bo;
struct kgem_bo *bo[1024];
uint32_t color[1025];
int last;
int size;
int dirty;
} solid_cache;
 
struct {
struct sna_gradient_cache {
struct kgem_bo *bo;
int nstops;
PictGradientStop *stops;
} cache[GRADIENT_CACHE_SIZE];
int size;
} gradient_cache;
 
struct sna_glyph_cache{
PicturePtr picture;
struct sna_glyph **glyphs;
uint16_t count;
uint16_t evict;
} glyph[2];
pixman_image_t *white_image;
PicturePtr white_picture;
#if HAS_PIXMAN_GLYPHS
pixman_glyph_cache_t *glyph_cache;
#endif
 
#endif
 
uint16_t vb_id;
uint16_t vertex_offset;
uint16_t vertex_start;
uint16_t vertex_index;
uint16_t vertex_used;
uint16_t vertex_size;
uint16_t vertex_reloc[16];
int nvertex_reloc;
 
struct kgem_bo *vbo;
float *vertices;
 
float vertex_data[1024];
};
 
struct gen2_render_state {
uint32_t target;
bool need_invariant;
uint32_t logic_op_enabled;
uint32_t ls1, ls2, vft;
uint32_t diffuse;
uint32_t specular;
};
 
struct gen3_render_state {
uint32_t current_dst;
bool need_invariant;
uint32_t tex_count;
uint32_t last_drawrect_limit;
uint32_t last_target;
uint32_t last_blend;
uint32_t last_constants;
uint32_t last_sampler;
uint32_t last_shader;
uint32_t last_diffuse;
uint32_t last_specular;
 
uint16_t last_vertex_offset;
uint16_t floats_per_vertex;
uint16_t last_floats_per_vertex;
 
uint32_t tex_map[4];
uint32_t tex_handle[2];
uint32_t tex_delta[2];
};
 
struct gen4_render_state {
struct kgem_bo *general_bo;
 
uint32_t vs;
uint32_t sf;
uint32_t wm;
uint32_t cc;
 
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t last_pipelined_pointers;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool needs_urb;
};
 
struct gen5_render_state {
struct kgem_bo *general_bo;
 
uint32_t vs;
uint32_t sf[2];
uint32_t wm;
uint32_t cc;
 
int ve_id;
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t last_pipelined_pointers;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
};
 
enum {
GEN6_WM_KERNEL_NOMASK = 0,
GEN6_WM_KERNEL_NOMASK_P,
 
GEN6_WM_KERNEL_MASK,
GEN6_WM_KERNEL_MASK_P,
 
GEN6_WM_KERNEL_MASKCA,
GEN6_WM_KERNEL_MASKCA_P,
 
GEN6_WM_KERNEL_MASKSA,
GEN6_WM_KERNEL_MASKSA_P,
 
GEN6_WM_KERNEL_OPACITY,
GEN6_WM_KERNEL_OPACITY_P,
 
GEN6_WM_KERNEL_VIDEO_PLANAR,
GEN6_WM_KERNEL_VIDEO_PACKED,
GEN6_KERNEL_COUNT
};
 
struct gen6_render_state {
const struct gt_info *info;
struct kgem_bo *general_bo;
 
uint32_t vs_state;
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
uint32_t wm_kernel[GEN6_KERNEL_COUNT][3];
 
uint32_t cc_blend;
 
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t blend;
uint32_t samplers;
uint32_t kernel;
 
uint16_t num_sf_outputs;
uint16_t ve_id;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool first_state_packet;
};
 
enum {
GEN7_WM_KERNEL_NOMASK = 0,
GEN7_WM_KERNEL_NOMASK_P,
 
GEN7_WM_KERNEL_MASK,
GEN7_WM_KERNEL_MASK_P,
 
GEN7_WM_KERNEL_MASKCA,
GEN7_WM_KERNEL_MASKCA_P,
 
GEN7_WM_KERNEL_MASKSA,
GEN7_WM_KERNEL_MASKSA_P,
 
GEN7_WM_KERNEL_OPACITY,
GEN7_WM_KERNEL_OPACITY_P,
 
GEN7_WM_KERNEL_VIDEO_PLANAR,
GEN7_WM_KERNEL_VIDEO_PACKED,
GEN7_WM_KERNEL_COUNT
};
 
struct gen7_render_state {
const struct gt_info *info;
struct kgem_bo *general_bo;
 
uint32_t vs_state;
uint32_t sf_state;
uint32_t sf_mask_state;
uint32_t wm_state;
uint32_t wm_kernel[GEN7_WM_KERNEL_COUNT][3];
 
uint32_t cc_blend;
 
uint32_t drawrect_offset;
uint32_t drawrect_limit;
uint32_t blend;
uint32_t samplers;
uint32_t kernel;
 
uint16_t num_sf_outputs;
uint16_t ve_id;
uint16_t last_primitive;
int16_t floats_per_vertex;
uint16_t surface_table;
 
bool needs_invariant;
bool emit_flush;
};
 
struct sna_static_stream {
uint32_t size, used;
uint8_t *data;
};
 
int sna_static_stream_init(struct sna_static_stream *stream);
uint32_t sna_static_stream_add(struct sna_static_stream *stream,
const void *data, uint32_t len, uint32_t align);
void *sna_static_stream_map(struct sna_static_stream *stream,
uint32_t len, uint32_t align);
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream,
void *ptr);
unsigned sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *));
 
unsigned sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *, int),
int width);
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream);
 
struct kgem_bo *
sna_render_get_solid(struct sna *sna,
uint32_t color);
 
void
sna_render_flush_solid(struct sna *sna);
 
 
uint32_t sna_rgba_for_color(uint32_t color, int depth);
uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format);
bool sna_get_rgba_from_pixel(uint32_t pixel,
uint16_t *red,
uint16_t *green,
uint16_t *blue,
uint16_t *alpha,
uint32_t format);
bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
const char *no_render_init(struct sna *sna);
const char *gen2_render_init(struct sna *sna, const char *backend);
const char *gen3_render_init(struct sna *sna, const char *backend);
const char *gen4_render_init(struct sna *sna, const char *backend);
const char *gen5_render_init(struct sna *sna, const char *backend);
const char *gen6_render_init(struct sna *sna, const char *backend);
const char *gen7_render_init(struct sna *sna, const char *backend);
 
#if 0
bool sna_tiling_composite(uint32_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t mask_x, int16_t mask_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp);
bool sna_tiling_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n);
 
bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n);
 
bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
int bpp, const BoxRec *box, int nbox);
 
bool sna_blt_composite(struct sna *sna,
uint32_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp,
bool fallback);
bool sna_blt_composite__convert(struct sna *sna,
int x, int y,
int width, int height,
struct sna_composite_op *tmp);
 
bool sna_blt_fill(struct sna *sna, uint8_t alu,
struct kgem_bo *bo,
int bpp,
uint32_t pixel,
struct sna_fill_op *fill);
 
bool sna_blt_copy(struct sna *sna, uint8_t alu,
struct kgem_bo *src,
struct kgem_bo *dst,
int bpp,
struct sna_copy_op *copy);
 
bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *bo,
int bpp,
uint32_t pixel,
const BoxRec *box, int n);
 
bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
int bpp,
const BoxRec *box, int n);
bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int nbox);
 
bool _sna_get_pixel_from_rgba(uint32_t *pixel,
uint16_t red,
uint16_t green,
uint16_t blue,
uint16_t alpha,
uint32_t format);
 
static inline bool
sna_get_pixel_from_rgba(uint32_t * pixel,
uint16_t red,
uint16_t green,
uint16_t blue,
uint16_t alpha,
uint32_t format)
{
switch (format) {
case PICT_x8r8g8b8:
alpha = 0xffff;
/* fall through to re-use a8r8g8b8 expansion */
case PICT_a8r8g8b8:
*pixel = ((alpha >> 8 << 24) |
(red >> 8 << 16) |
(green & 0xff00) |
(blue >> 8));
return TRUE;
case PICT_a8:
*pixel = alpha >> 8;
return TRUE;
}
 
return _sna_get_pixel_from_rgba(pixel, red, green, blue, alpha, format);
}
 
struct kgem_bo *
__sna_render_pixmap_bo(struct sna *sna,
PixmapPtr pixmap,
const BoxRec *box,
bool blt);
 
int
sna_render_pixmap_bo(struct sna *sna,
struct sna_composite_channel *channel,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
bool
sna_render_pixmap_partial(struct sna *sna,
PixmapPtr pixmap,
struct kgem_bo *bo,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h);
 
int
sna_render_picture_extract(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_approximate_gradient(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_fixup(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y);
 
int
sna_render_picture_convert(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
PixmapPtr pixmap,
int16_t x, int16_t y,
int16_t w, int16_t h,
int16_t dst_x, int16_t dst_y,
bool fixup_alpha);
 
inline static void sna_render_composite_redirect_init(struct sna_composite_op *op)
{
struct sna_composite_redirect *t = &op->redirect;
t->real_bo = NULL;
t->damage = NULL;
}
 
bool
sna_render_composite_redirect(struct sna *sna,
struct sna_composite_op *op,
int x, int y, int width, int height,
bool partial);
 
void
sna_render_composite_redirect_done(struct sna *sna,
const struct sna_composite_op *op);
 
bool
sna_composite_mask_is_opaque(PicturePtr mask);
#endif
 
void sna_vertex_init(struct sna *sna);
 
static inline void sna_vertex_lock(struct sna_render *r)
{
// pthread_mutex_lock(&r->lock);
}
 
static inline void sna_vertex_acquire__locked(struct sna_render *r)
{
r->active++;
}
 
static inline void sna_vertex_unlock(struct sna_render *r)
{
// pthread_mutex_unlock(&r->lock);
}
 
static inline void sna_vertex_release__locked(struct sna_render *r)
{
assert(r->active > 0);
--r->active;
// if (--r->active == 0)
// pthread_cond_signal(&r->wait);
}
 
static inline bool sna_vertex_wait__locked(struct sna_render *r)
{
bool was_active = r->active;
// while (r->active)
// pthread_cond_wait(&r->wait, &r->lock);
return was_active;
}
 
#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
PICT_FORMAT_TYPE(format), \
0, \
PICT_FORMAT_R(format), \
PICT_FORMAT_G(format), \
PICT_FORMAT_B(format))
static bool
gen3_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
static bool
gen4_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen5_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen6_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
static bool
gen7_blit_tex(struct sna *sna,
uint8_t op, bool scale,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr mask,struct kgem_bo *mask_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
int32_t src_x, int32_t src_y,
int32_t msk_x, int32_t msk_y,
int32_t dst_x, int32_t dst_y,
int32_t width, int32_t height,
struct sna_composite_op *tmp);
 
#endif /* SNA_RENDER_H */
/drivers/video/Intel-2D/sna/sna_render_inline.h
0,0 → 1,70
#ifndef SNA_RENDER_INLINE_H
#define SNA_RENDER_INLINE_H
 
static inline bool need_tiling(struct sna *sna, int16_t width, int16_t height)
{
/* Is the damage area too large to fit in 3D pipeline,
* and so do we need to split the operation up into tiles?
*/
return (width > sna->render.max_3d_size ||
height > sna->render.max_3d_size);
}
 
static inline bool need_redirect(struct sna *sna, PixmapPtr dst)
{
/* Is the pixmap too large to render to? */
return (dst->drawable.width > sna->render.max_3d_size ||
dst->drawable.height > sna->render.max_3d_size);
}
 
static force_inline float pack_2s(int16_t x, int16_t y)
{
union {
struct sna_coordinate p;
float f;
} u;
u.p.x = x;
u.p.y = y;
return u.f;
}
 
static force_inline int vertex_space(struct sna *sna)
{
return sna->render.vertex_size - sna->render.vertex_used;
}
static force_inline void vertex_emit(struct sna *sna, float v)
{
assert(sna->render.vertex_used < sna->render.vertex_size);
sna->render.vertices[sna->render.vertex_used++] = v;
}
static force_inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y)
{
vertex_emit(sna, pack_2s(x, y));
}
 
static force_inline int batch_space(struct sna *sna)
{
assert(sna->kgem.nbatch <= KGEM_BATCH_SIZE(&sna->kgem));
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED <= sna->kgem.surface);
return sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED;
}
 
static force_inline void batch_emit(struct sna *sna, uint32_t dword)
{
assert(sna->kgem.mode != KGEM_NONE);
assert(sna->kgem.nbatch + KGEM_BATCH_RESERVED < sna->kgem.surface);
sna->kgem.batch[sna->kgem.nbatch++] = dword;
}
 
static force_inline void batch_emit_float(struct sna *sna, float f)
{
union {
uint32_t dw;
float f;
} u;
u.f = f;
batch_emit(sna, u.dw);
}
 
 
#endif /* SNA_RENDER_INLINE_H */
/drivers/video/Intel-2D/sna/sna_stream.c
0,0 → 1,156
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#include "sna.h"
#include "sna_render.h"
#include "brw/brw.h"
#include <fcntl.h>
 
int sna_static_stream_init(struct sna_static_stream *stream)
{
stream->used = 0;
stream->size = 64*1024;
 
stream->data = malloc(stream->size);
return stream->data != NULL;
}
 
static uint32_t sna_static_stream_alloc(struct sna_static_stream *stream,
uint32_t len, uint32_t align)
{
uint32_t offset = ALIGN(stream->used, align);
uint32_t size = offset + len;
 
if (size > stream->size) {
do
stream->size *= 2;
while (stream->size < size);
 
stream->data = realloc(stream->data, stream->size);
}
 
stream->used = size;
return offset;
}
 
uint32_t sna_static_stream_add(struct sna_static_stream *stream,
const void *data, uint32_t len, uint32_t align)
{
uint32_t offset = sna_static_stream_alloc(stream, len, align);
memcpy(stream->data + offset, data, len);
return offset;
}
 
void *sna_static_stream_map(struct sna_static_stream *stream,
uint32_t len, uint32_t align)
{
uint32_t offset = sna_static_stream_alloc(stream, len, align);
return memset(stream->data + offset, 0, len);
}
 
uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr)
{
return (uint8_t *)ptr - stream->data;
}
 
struct kgem_bo *sna_static_stream_fini(struct sna *sna,
struct sna_static_stream *stream)
{
struct kgem_bo *bo;
 
DBG(("uploaded %d bytes of static state\n", stream->used));
 
if (DEBUG_DUMP)
{
int fd = open("/tmp1/1/static.bin", O_CREAT|O_WRONLY|O_BINARY);
if (fd != -1) {
write(fd, stream->data, stream->used);
close(fd);
}
else
{
printf("SNA: failed to write static stream\n");
asm volatile("int3");
}
}
 
 
bo = kgem_create_linear(&sna->kgem, stream->used, 0);
if (bo && !kgem_bo_write(&sna->kgem, bo, stream->data, stream->used)) {
kgem_bo_destroy(&sna->kgem, bo);
return NULL;
}
 
free(stream->data);
 
return bo;
}
 
unsigned
sna_static_stream_compile_sf(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *))
{
struct brw_compile p;
 
brw_compile_init(&p, sna->kgem.gen,
sna_static_stream_map(stream,
64*sizeof(uint32_t), 64));
 
if (!compile(&p)) {
stream->used -= 64*sizeof(uint32_t);
return 0;
}
 
assert(p.nr_insn*sizeof(struct brw_instruction) <= 64*sizeof(uint32_t));
 
stream->used -= 64*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
return sna_static_stream_offsetof(stream, p.store);
}
 
unsigned
sna_static_stream_compile_wm(struct sna *sna,
struct sna_static_stream *stream,
bool (*compile)(struct brw_compile *, int),
int dispatch_width)
{
struct brw_compile p;
 
brw_compile_init(&p, sna->kgem.gen,
sna_static_stream_map(stream,
256*sizeof(uint32_t), 64));
 
if (!compile(&p, dispatch_width)) {
stream->used -= 256*sizeof(uint32_t);
return 0;
}
 
assert(p.nr_insn*sizeof(struct brw_instruction) <= 256*sizeof(uint32_t));
 
stream->used -= 256*sizeof(uint32_t) - p.nr_insn*sizeof(struct brw_instruction);
return sna_static_stream_offsetof(stream, p.store);
}
/drivers/video/Intel-2D/sna/sna_transform.c
0,0 → 1,147
/*
* Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved.
* Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org>
* Copyright © 2010 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Jesse Barns <jbarnes@virtuousgeek.org>
* Chris Wilson <chris@chris-wilson.co.uk>
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
 
#define pixman_fixed_e ((pixman_fixed_t) 1)
#define pixman_fixed_1 (pixman_int_to_fixed(1))
#define pixman_fixed_1_minus_e (pixman_fixed_1 - pixman_fixed_e)
#define pixman_fixed_to_int(f) ((int) ((f) >> 16))
#define pixman_int_to_fixed(i) ((pixman_fixed_t) ((i) << 16))
#define pixman_fixed_fraction(f) ((f) & pixman_fixed_1_minus_e)
 
#define IntToxFixed(i) pixman_int_to_fixed(i)
 
/**
* Returns whether the provided transform is affine.
*
* transform may be null.
*/
bool sna_transform_is_affine(const PictTransform *t)
{
if (t == NULL)
return true;
 
return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
}
 
bool
sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx,
pixman_fixed_t *ty)
{
if (t == NULL) {
*tx = *ty = 0;
return true;
}
 
if (t->matrix[0][0] != IntToxFixed(1) ||
t->matrix[0][1] != 0 ||
t->matrix[1][0] != 0 ||
t->matrix[1][1] != IntToxFixed(1) ||
t->matrix[2][0] != 0 ||
t->matrix[2][1] != 0 ||
t->matrix[2][2] != IntToxFixed(1))
return false;
 
*tx = t->matrix[0][2];
*ty = t->matrix[1][2];
return true;
}
 
bool
sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty)
{
if (t == NULL) {
*tx = *ty = 0;
return true;
}
 
if (t->matrix[0][0] != IntToxFixed(1) ||
t->matrix[0][1] != 0 ||
t->matrix[1][0] != 0 ||
t->matrix[1][1] != IntToxFixed(1) ||
t->matrix[2][0] != 0 ||
t->matrix[2][1] != 0 ||
t->matrix[2][2] != IntToxFixed(1))
return false;
 
if (pixman_fixed_fraction(t->matrix[0][2]) ||
pixman_fixed_fraction(t->matrix[1][2]))
return false;
 
*tx = pixman_fixed_to_int(t->matrix[0][2]);
*ty = pixman_fixed_to_int(t->matrix[1][2]);
return true;
}
 
/**
* Returns the floating-point coordinates transformed by the given transform.
*/
void
sna_get_transformed_coordinates(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out)
{
if (transform == NULL) {
*x_out = x;
*y_out = y;
} else
_sna_get_transformed_coordinates(x, y, transform, x_out, y_out);
}
 
/**
* Returns the un-normalized floating-point coordinates transformed by the given transform.
*/
void
sna_get_transformed_coordinates_3d(int x, int y,
const PictTransform *transform,
float *x_out, float *y_out, float *w_out)
{
if (transform == NULL) {
*x_out = x;
*y_out = y;
*w_out = 1;
} else {
int64_t result[3];
 
if (_sna_transform_point(transform, x, y, result)) {
*x_out = result[0] / 65536.;
*y_out = result[1] / 65536.;
*w_out = result[2] / 65536.;
} else {
*x_out = *y_out = 0;
*w_out = 1.;
}
}
}
/drivers/video/Intel-2D/sna/utils.c
0,0 → 1,150
 
#include <stdint.h>
#include <stdio.h>
 
 
static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx)
: "memory");
}
 
/* Some CPUID calls want 'count' to be placed in ecx */
static inline void cpuid_count(unsigned int op, int count,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = count;
native_cpuid(eax, ebx, ecx, edx);
}
 
 
enum _cache_type {
CACHE_TYPE_NULL = 0,
CACHE_TYPE_DATA = 1,
CACHE_TYPE_INST = 2,
CACHE_TYPE_UNIFIED = 3
};
 
 
union _cpuid4_leaf_eax {
struct {
enum _cache_type type:5;
unsigned int level:3;
unsigned int is_self_initializing:1;
unsigned int is_fully_associative:1;
unsigned int reserved:4;
unsigned int num_threads_sharing:12;
unsigned int num_cores_on_die:6;
} split;
uint32_t full;
};
 
union _cpuid4_leaf_ebx {
struct {
unsigned int coherency_line_size:12;
unsigned int physical_line_partition:10;
unsigned int ways_of_associativity:10;
} split;
uint32_t full;
};
 
union _cpuid4_leaf_ecx {
struct {
unsigned int number_of_sets:32;
} split;
uint32_t full;
};
 
struct _cpuid4_info_regs {
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
};
 
static int
cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned edx;
 
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
 
if (eax.split.type == CACHE_TYPE_NULL)
return -1; /* better error ? */
 
this_leaf->eax = eax;
this_leaf->ebx = ebx;
this_leaf->ecx = ecx;
this_leaf->size = (ecx.split.number_of_sets + 1) *
(ebx.split.coherency_line_size + 1) *
(ebx.split.physical_line_partition + 1) *
(ebx.split.ways_of_associativity + 1);
return 0;
}
 
static int find_num_cache_leaves()
{
unsigned int eax, ebx, ecx, edx, op;
union _cpuid4_leaf_eax cache_eax;
int i = -1;
 
do {
++i;
/* Do cpuid(op) loop to find out num_cache_leaves */
cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
} while (cache_eax.split.type != CACHE_TYPE_NULL);
return i;
};
 
unsigned int cpu_cache_size()
{
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int num_cache_leaves;
 
num_cache_leaves = find_num_cache_leaves();
 
for (i = 0; i < num_cache_leaves; i++)
{
struct _cpuid4_info_regs this_leaf;
int retval;
 
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
if (retval >= 0) {
switch (this_leaf.eax.split.level)
{
case 1:
if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
new_l1d = this_leaf.size;
else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
new_l1i = this_leaf.size;
break;
case 2:
new_l2 = this_leaf.size;
break;
case 3:
new_l3 = this_leaf.size;
break;
default:
break;
}
}
}
printf("l2 cache %d l3 cache %d\n", new_l2, new_l3);
 
return new_l3 != 0 ? new_l3 : new_l2;
};