Subversion Repositories Kolibri OS

Compare Revisions

Ignore whitespace Rev 3253 → Rev 3254

/drivers/video/Intel-2D/brw/brw.h
0,0 → 1,17
#include "brw_eu.h"
 
bool brw_sf_kernel__nomask(struct brw_compile *p);
bool brw_sf_kernel__mask(struct brw_compile *p);
 
bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width);
 
bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width);
bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width);
/drivers/video/Intel-2D/brw/brw_eu.c
0,0 → 1,150
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/* Returns the corresponding conditional mod for swapping src0 and
* src1 in e.g. CMP.
*/
uint32_t
brw_swap_cmod(uint32_t cmod)
{
switch (cmod) {
case BRW_CONDITIONAL_Z:
case BRW_CONDITIONAL_NZ:
return cmod;
case BRW_CONDITIONAL_G:
return BRW_CONDITIONAL_LE;
case BRW_CONDITIONAL_GE:
return BRW_CONDITIONAL_L;
case BRW_CONDITIONAL_L:
return BRW_CONDITIONAL_GE;
case BRW_CONDITIONAL_LE:
return BRW_CONDITIONAL_G;
default:
return ~0;
}
}
 
/* How does predicate control work when execution_size != 8? Do I
* need to test/set for 0xffff when execution_size is 16?
*/
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value )
{
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
if (value != 0xff) {
if (value != p->flag_value) {
brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
p->flag_value = value;
}
 
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
}
 
void brw_set_compression_control(struct brw_compile *p,
enum brw_compression compression_control)
{
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED);
 
if (p->gen >= 060) {
/* Since we don't use the 32-wide support in gen6, we translate
* the pre-gen6 compression control here.
*/
switch (compression_control) {
case BRW_COMPRESSION_NONE:
/* This is the "use the first set of bits of dmask/vmask/arf
* according to execsize" option.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1Q;
break;
case BRW_COMPRESSION_2NDHALF:
/* For 8-wide, this is "use the second set of 8 bits." */
p->current->header.compression_control = GEN6_COMPRESSION_2Q;
break;
case BRW_COMPRESSION_COMPRESSED:
/* For 16-wide instruction compression, use the first set of 16 bits
* since we don't do 32-wide dispatch.
*/
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
default:
assert(!"not reached");
p->current->header.compression_control = GEN6_COMPRESSION_1H;
break;
}
} else {
p->current->header.compression_control = compression_control;
}
}
 
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
p->compressed_stack[p->current - p->stack] = p->compressed;
p->current++;
}
 
void brw_pop_insn_state( struct brw_compile *p )
{
assert(p->current != p->stack);
p->current--;
p->compressed = p->compressed_stack[p->current - p->stack];
}
 
void brw_compile_init(struct brw_compile *p, int gen, void *store)
{
assert(gen);
 
p->gen = gen;
p->store = store;
 
p->nr_insn = 0;
p->current = p->stack;
p->compressed = false;
memset(p->current, 0, sizeof(p->current[0]));
 
/* Some defaults?
*/
brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
brw_set_saturate(p, 0);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_predicate_control_flag_value(p, 0xff);
 
p->if_stack_depth = 0;
p->if_stack_array_size = 0;
p->if_stack = NULL;
}
/drivers/video/Intel-2D/brw/brw_eu.h
0,0 → 1,2266
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
 
#ifndef BRW_EU_H
#define BRW_EU_H
 
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
 
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
 
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
 
#define WRITEMASK_X 0x1
#define WRITEMASK_Y 0x2
#define WRITEMASK_Z 0x4
#define WRITEMASK_W 0x8
 
#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
 
/** Number of general purpose registers (VS, WM, etc) */
#define BRW_MAX_GRF 128
 
/** Number of message register file registers */
#define BRW_MAX_MRF 16
 
 
#define BRW_ALIGN_1 0
#define BRW_ALIGN_16 1
 
#define BRW_ADDRESS_DIRECT 0
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define BRW_CHANNEL_X 0
#define BRW_CHANNEL_Y 1
#define BRW_CHANNEL_Z 2
#define BRW_CHANNEL_W 3
 
enum brw_compression {
BRW_COMPRESSION_NONE,
BRW_COMPRESSION_2NDHALF,
BRW_COMPRESSION_COMPRESSED,
};
 
#define GEN6_COMPRESSION_1Q 0
#define GEN6_COMPRESSION_2Q 1
#define GEN6_COMPRESSION_3Q 2
#define GEN6_COMPRESSION_4Q 3
#define GEN6_COMPRESSION_1H 0
#define GEN6_COMPRESSION_2H 2
 
#define BRW_CONDITIONAL_NONE 0
#define BRW_CONDITIONAL_Z 1
#define BRW_CONDITIONAL_NZ 2
#define BRW_CONDITIONAL_EQ 1 /* Z */
#define BRW_CONDITIONAL_NEQ 2 /* NZ */
#define BRW_CONDITIONAL_G 3
#define BRW_CONDITIONAL_GE 4
#define BRW_CONDITIONAL_L 5
#define BRW_CONDITIONAL_LE 6
#define BRW_CONDITIONAL_R 7
#define BRW_CONDITIONAL_O 8
#define BRW_CONDITIONAL_U 9
 
#define BRW_DEBUG_NONE 0
#define BRW_DEBUG_BREAKPOINT 1
 
#define BRW_DEPENDENCY_NORMAL 0
#define BRW_DEPENDENCY_NOTCLEARED 1
#define BRW_DEPENDENCY_NOTCHECKED 2
#define BRW_DEPENDENCY_DISABLE 3
 
#define BRW_EXECUTE_1 0
#define BRW_EXECUTE_2 1
#define BRW_EXECUTE_4 2
#define BRW_EXECUTE_8 3
#define BRW_EXECUTE_16 4
#define BRW_EXECUTE_32 5
 
#define BRW_HORIZONTAL_STRIDE_0 0
#define BRW_HORIZONTAL_STRIDE_1 1
#define BRW_HORIZONTAL_STRIDE_2 2
#define BRW_HORIZONTAL_STRIDE_4 3
 
#define BRW_INSTRUCTION_NORMAL 0
#define BRW_INSTRUCTION_SATURATE 1
 
#define BRW_MASK_ENABLE 0
#define BRW_MASK_DISABLE 1
 
/** @{
*
* Gen6 has replaced "mask enable/disable" with WECtrl, which is
* effectively the same but much simpler to think about. Now, there
* are two contributors ANDed together to whether channels are
* executed: The predication on the instruction, and the channel write
* enable.
*/
/**
* This is the default value. It means that a channel's write enable is set
* if the per-channel IP is pointing at this instruction.
*/
#define BRW_WE_NORMAL 0
/**
* This is used like BRW_MASK_DISABLE, and causes all channels to have
* their write enable set. Note that predication still contributes to
* whether the channel actually gets written.
*/
#define BRW_WE_ALL 1
/** @} */
 
enum opcode {
/* These are the actual hardware opcodes. */
BRW_OPCODE_MOV = 1,
BRW_OPCODE_SEL = 2,
BRW_OPCODE_NOT = 4,
BRW_OPCODE_AND = 5,
BRW_OPCODE_OR = 6,
BRW_OPCODE_XOR = 7,
BRW_OPCODE_SHR = 8,
BRW_OPCODE_SHL = 9,
BRW_OPCODE_RSR = 10,
BRW_OPCODE_RSL = 11,
BRW_OPCODE_ASR = 12,
BRW_OPCODE_CMP = 16,
BRW_OPCODE_CMPN = 17,
BRW_OPCODE_JMPI = 32,
BRW_OPCODE_IF = 34,
BRW_OPCODE_IFF = 35,
BRW_OPCODE_ELSE = 36,
BRW_OPCODE_ENDIF = 37,
BRW_OPCODE_DO = 38,
BRW_OPCODE_WHILE = 39,
BRW_OPCODE_BREAK = 40,
BRW_OPCODE_CONTINUE = 41,
BRW_OPCODE_HALT = 42,
BRW_OPCODE_MSAVE = 44,
BRW_OPCODE_MRESTORE = 45,
BRW_OPCODE_PUSH = 46,
BRW_OPCODE_POP = 47,
BRW_OPCODE_WAIT = 48,
BRW_OPCODE_SEND = 49,
BRW_OPCODE_SENDC = 50,
BRW_OPCODE_MATH = 56,
BRW_OPCODE_ADD = 64,
BRW_OPCODE_MUL = 65,
BRW_OPCODE_AVG = 66,
BRW_OPCODE_FRC = 67,
BRW_OPCODE_RNDU = 68,
BRW_OPCODE_RNDD = 69,
BRW_OPCODE_RNDE = 70,
BRW_OPCODE_RNDZ = 71,
BRW_OPCODE_MAC = 72,
BRW_OPCODE_MACH = 73,
BRW_OPCODE_LZD = 74,
BRW_OPCODE_SAD2 = 80,
BRW_OPCODE_SADA2 = 81,
BRW_OPCODE_DP4 = 84,
BRW_OPCODE_DPH = 85,
BRW_OPCODE_DP3 = 86,
BRW_OPCODE_DP2 = 87,
BRW_OPCODE_DPA2 = 88,
BRW_OPCODE_LINE = 89,
BRW_OPCODE_PLN = 90,
BRW_OPCODE_NOP = 126,
 
/* These are compiler backend opcodes that get translated into other
* instructions.
*/
FS_OPCODE_FB_WRITE = 128,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
SHADER_OPCODE_EXP2,
SHADER_OPCODE_LOG2,
SHADER_OPCODE_POW,
SHADER_OPCODE_SIN,
SHADER_OPCODE_COS,
FS_OPCODE_DDX,
FS_OPCODE_DDY,
FS_OPCODE_PIXEL_X,
FS_OPCODE_PIXEL_Y,
FS_OPCODE_CINTERP,
FS_OPCODE_LINTERP,
FS_OPCODE_TEX,
FS_OPCODE_TXB,
FS_OPCODE_TXD,
FS_OPCODE_TXF,
FS_OPCODE_TXL,
FS_OPCODE_TXS,
FS_OPCODE_DISCARD,
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
 
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
};
 
#define BRW_PREDICATE_NONE 0
#define BRW_PREDICATE_NORMAL 1
#define BRW_PREDICATE_ALIGN1_ANYV 2
#define BRW_PREDICATE_ALIGN1_ALLV 3
#define BRW_PREDICATE_ALIGN1_ANY2H 4
#define BRW_PREDICATE_ALIGN1_ALL2H 5
#define BRW_PREDICATE_ALIGN1_ANY4H 6
#define BRW_PREDICATE_ALIGN1_ALL4H 7
#define BRW_PREDICATE_ALIGN1_ANY8H 8
#define BRW_PREDICATE_ALIGN1_ALL8H 9
#define BRW_PREDICATE_ALIGN1_ANY16H 10
#define BRW_PREDICATE_ALIGN1_ALL16H 11
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5
#define BRW_PREDICATE_ALIGN16_ANY4H 6
#define BRW_PREDICATE_ALIGN16_ALL4H 7
 
#define BRW_ARCHITECTURE_REGISTER_FILE 0
#define BRW_GENERAL_REGISTER_FILE 1
#define BRW_MESSAGE_REGISTER_FILE 2
#define BRW_IMMEDIATE_VALUE 3
 
#define BRW_REGISTER_TYPE_UD 0
#define BRW_REGISTER_TYPE_D 1
#define BRW_REGISTER_TYPE_UW 2
#define BRW_REGISTER_TYPE_W 3
#define BRW_REGISTER_TYPE_UB 4
#define BRW_REGISTER_TYPE_B 5
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define BRW_REGISTER_TYPE_HF 6
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define BRW_REGISTER_TYPE_F 7
 
#define BRW_ARF_NULL 0x00
#define BRW_ARF_ADDRESS 0x10
#define BRW_ARF_ACCUMULATOR 0x20
#define BRW_ARF_FLAG 0x30
#define BRW_ARF_MASK 0x40
#define BRW_ARF_MASK_STACK 0x50
#define BRW_ARF_MASK_STACK_DEPTH 0x60
#define BRW_ARF_STATE 0x70
#define BRW_ARF_CONTROL 0x80
#define BRW_ARF_NOTIFICATION_COUNT 0x90
#define BRW_ARF_IP 0xA0
 
#define BRW_MRF_COMPR4 (1 << 7)
 
#define BRW_AMASK 0
#define BRW_IMASK 1
#define BRW_LMASK 2
#define BRW_CMASK 3
 
#define BRW_THREAD_NORMAL 0
#define BRW_THREAD_ATOMIC 1
#define BRW_THREAD_SWITCH 2
 
#define BRW_VERTICAL_STRIDE_0 0
#define BRW_VERTICAL_STRIDE_1 1
#define BRW_VERTICAL_STRIDE_2 2
#define BRW_VERTICAL_STRIDE_4 3
#define BRW_VERTICAL_STRIDE_8 4
#define BRW_VERTICAL_STRIDE_16 5
#define BRW_VERTICAL_STRIDE_32 6
#define BRW_VERTICAL_STRIDE_64 7
#define BRW_VERTICAL_STRIDE_128 8
#define BRW_VERTICAL_STRIDE_256 9
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define BRW_WIDTH_1 0
#define BRW_WIDTH_2 1
#define BRW_WIDTH_4 2
#define BRW_WIDTH_8 3
#define BRW_WIDTH_16 4
 
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define BRW_POLYGON_FACING_FRONT 0
#define BRW_POLYGON_FACING_BACK 1
 
#define BRW_MESSAGE_TARGET_NULL 0
#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */
#define BRW_MESSAGE_TARGET_SAMPLER 2
#define BRW_MESSAGE_TARGET_GATEWAY 3
#define BRW_MESSAGE_TARGET_DATAPORT_READ 4
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5
#define BRW_MESSAGE_TARGET_URB 6
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4
#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5
#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9
 
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define BRW_SAMPLER_MESSAGE_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10
 
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3
 
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
/* This one stays the same across generations. */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
/* GEN4 */
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3
/* G45, GEN5 */
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
/* GEN6 */
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6
 
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
/**
* Message target: Shared Function ID for where to SEND a message.
*
* These are enumerated in the ISA reference under "send - Send Message".
* In particular, see the following tables:
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
* Overview / GPE Function IDs
*/
enum brw_message_target {
BRW_SFID_NULL = 0,
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */
BRW_SFID_SAMPLER = 2,
BRW_SFID_MESSAGE_GATEWAY = 3,
BRW_SFID_DATAPORT_READ = 4,
BRW_SFID_DATAPORT_WRITE = 5,
BRW_SFID_URB = 6,
BRW_SFID_THREAD_SPAWNER = 7,
 
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4,
GEN6_SFID_DATAPORT_RENDER_CACHE = 5,
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
 
GEN7_SFID_DATAPORT_DATA_CACHE = 10,
};
 
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10
 
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7
 
/* GEN6 */
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14
 
#define BRW_MATH_FUNCTION_INV 1
#define BRW_MATH_FUNCTION_LOG 2
#define BRW_MATH_FUNCTION_EXP 3
#define BRW_MATH_FUNCTION_SQRT 4
#define BRW_MATH_FUNCTION_RSQ 5
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */
#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */
#define BRW_MATH_FUNCTION_POW 10
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13
 
#define BRW_MATH_INTEGER_UNSIGNED 0
#define BRW_MATH_INTEGER_SIGNED 1
 
#define BRW_MATH_PRECISION_FULL 0
#define BRW_MATH_PRECISION_PARTIAL 1
 
#define BRW_MATH_SATURATE_NONE 0
#define BRW_MATH_SATURATE_SATURATE 1
 
#define BRW_MATH_DATA_VECTOR 0
#define BRW_MATH_DATA_SCALAR 1
 
#define BRW_URB_OPCODE_WRITE 0
 
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
#define BRW_URB_SWIZZLE_TRANSPOSE 2
 
#define BRW_SCRATCH_SPACE_SIZE_1K 0
#define BRW_SCRATCH_SPACE_SIZE_2K 1
#define BRW_SCRATCH_SPACE_SIZE_4K 2
#define BRW_SCRATCH_SPACE_SIZE_8K 3
#define BRW_SCRATCH_SPACE_SIZE_16K 4
#define BRW_SCRATCH_SPACE_SIZE_32K 5
#define BRW_SCRATCH_SPACE_SIZE_64K 6
#define BRW_SCRATCH_SPACE_SIZE_128K 7
#define BRW_SCRATCH_SPACE_SIZE_256K 8
#define BRW_SCRATCH_SPACE_SIZE_512K 9
#define BRW_SCRATCH_SPACE_SIZE_1M 10
#define BRW_SCRATCH_SPACE_SIZE_2M 11
 
#define REG_SIZE (8*4)
 
struct brw_instruction {
struct {
unsigned opcode:7;
unsigned pad:1;
unsigned access_mode:1;
unsigned mask_control:1;
unsigned dependency_control:2;
unsigned compression_control:2; /* gen6: quater control */
unsigned thread_control:2;
unsigned predicate_control:4;
unsigned predicate_inverse:1;
unsigned execution_size:3;
/**
* Conditional Modifier for most instructions. On Gen6+, this is also
* used for the SEND instruction's Message Target/SFID.
*/
unsigned destreg__conditionalmod:4;
unsigned acc_wr_control:1;
unsigned cmpt_control:1;
unsigned debug_control:1;
unsigned saturate:1;
} header;
 
union {
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_subreg_nr:5;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2; /* 0x00000c00 */
unsigned src1_reg_type:3; /* 0x00007000 */
unsigned pad:1;
int dest_indirect_offset:10; /* offset against the deref'd address reg */
unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia1;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:1;
unsigned dest_reg_nr:8;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} da16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned pad0:6;
unsigned dest_writemask:4;
int dest_indirect_offset:6;
unsigned dest_subreg_nr:3;
unsigned dest_horiz_stride:2;
unsigned dest_address_mode:1;
} ia16;
 
struct {
unsigned dest_reg_file:2;
unsigned dest_reg_type:3;
unsigned src0_reg_file:2;
unsigned src0_reg_type:3;
unsigned src1_reg_file:2;
unsigned src1_reg_type:3;
unsigned pad:1;
 
int jump_count:16;
} branch_gen6;
 
struct {
unsigned dest_reg_file:1;
unsigned flag_subreg_num:1;
unsigned pad0:2;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src2_abs:1;
unsigned src2_negate:1;
unsigned pad1:7;
unsigned dest_writemask:4;
unsigned dest_subreg_nr:3;
unsigned dest_reg_nr:8;
} da3src;
} bits1;
 
 
union {
struct {
unsigned src0_subreg_nr:5;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} da1;
 
struct {
int src0_indirect_offset:10;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_horiz_stride:2;
unsigned src0_width:3;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad:5;
} ia1;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
unsigned src0_subreg_nr:1;
unsigned src0_reg_nr:8;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} da16;
 
struct {
unsigned src0_swz_x:2;
unsigned src0_swz_y:2;
int src0_indirect_offset:6;
unsigned src0_subreg_nr:3;
unsigned src0_abs:1;
unsigned src0_negate:1;
unsigned src0_address_mode:1;
unsigned src0_swz_z:2;
unsigned src0_swz_w:2;
unsigned pad0:1;
unsigned src0_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia16;
 
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
*
* Does not apply to Gen6+. The SFID/message target moved to bits
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
*/
struct {
unsigned pad:26;
unsigned end_of_thread:1;
unsigned pad1:1;
unsigned sfid:4;
} send_gen5; /* for Ironlake only */
 
struct {
unsigned src0_rep_ctrl:1;
unsigned src0_swizzle:8;
unsigned src0_subreg_nr:3;
unsigned src0_reg_nr:8;
unsigned pad0:1;
unsigned src1_rep_ctrl:1;
unsigned src1_swizzle:8;
unsigned src1_subreg_nr_low:2;
} da3src;
} bits2;
 
union {
struct {
unsigned src1_subreg_nr:5;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned pad0:7;
} da1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
unsigned src1_subreg_nr:1;
unsigned src1_reg_nr:8;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned pad2:7;
} da16;
 
struct {
int src1_indirect_offset:10;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned src1_address_mode:1;
unsigned src1_horiz_stride:2;
unsigned src1_width:3;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad1:5;
} ia1;
 
struct {
unsigned src1_swz_x:2;
unsigned src1_swz_y:2;
int src1_indirect_offset:6;
unsigned src1_subreg_nr:3;
unsigned src1_abs:1;
unsigned src1_negate:1;
unsigned pad0:1;
unsigned src1_swz_z:2;
unsigned src1_swz_w:2;
unsigned pad1:1;
unsigned src1_vert_stride:4;
unsigned flag_subreg_nr:1;
unsigned flag_reg_nr:1;
unsigned pad2:5;
} ia16;
 
struct {
int jump_count:16; /* note: signed */
unsigned pop_count:4;
unsigned pad0:12;
} if_else;
 
/* This is also used for gen7 IF/ELSE instructions */
struct {
/* Signed jump distance to the ip to jump to if all channels
* are disabled after the break or continue. It should point
* to the end of the innermost control flow block, as that's
* where some channel could get re-enabled.
*/
int jip:16;
 
/* Signed jump distance to the location to resume execution
* of this channel if it's enabled for the break or continue.
*/
int uip:16;
} break_cont;
 
/**
* \defgroup SEND instructions / Message Descriptors
*
* @{
*/
 
/**
* Generic Message Descriptor for Gen4 SEND instructions. The structs
* below expand function_control to something specific for their
* message. Due to struct packing issues, they duplicate these bits.
*
* See the G45 PRM, Volume 4, Table 14-15.
*/
struct {
unsigned function_control:16;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} generic;
 
/**
* Generic Message Descriptor for Gen5-7 SEND instructions.
*
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most
* of the information on the SEND instruction is missing from the public
* Ironlake PRM.)
*
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
* According to the SEND instruction description:
* "The MSb of the message description, the EOT field, always comes from
* bit 127 of the instruction word"...which is bit 31 of this field.
*/
struct {
unsigned function_control:19;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} generic_gen5;
 
/** G45 PRM, Volume 4, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned pad0:8;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} math;
 
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
struct {
unsigned function:4;
unsigned int_type:1;
unsigned precision:1;
unsigned saturate:1;
unsigned data_type:1;
unsigned snapshot:1;
unsigned pad0:10;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} math_gen5;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned return_format:2;
unsigned msg_type:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler;
 
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} sampler_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:4;
unsigned simd_mode:2;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen5;
 
struct {
unsigned binding_table_index:8;
unsigned sampler:4;
unsigned msg_type:5;
unsigned simd_mode:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} sampler_gen7;
 
struct brw_urb_immediate {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} urb;
 
struct {
unsigned opcode:4;
unsigned offset:6;
unsigned swizzle_control:2;
unsigned pad:1;
unsigned allocate:1;
unsigned used:1;
unsigned complete:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen5;
 
struct {
unsigned opcode:3;
unsigned offset:11;
unsigned swizzle_control:1;
unsigned complete:1;
unsigned per_slot_offset:1;
unsigned pad0:2;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} urb_gen7;
 
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
struct {
unsigned binding_table_index:8;
unsigned msg_control:4;
unsigned msg_type:2;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_read_g4x;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned msg_type:3;
unsigned target_cache:2;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_read_gen5;
 
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned response_length:4;
unsigned msg_length:4;
unsigned msg_target:4;
unsigned pad1:3;
unsigned end_of_thread:1;
} dp_write;
 
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned last_render_target:1;
unsigned msg_type:3;
unsigned send_commit_msg:1;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} dp_write_gen5;
 
/**
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
*
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
**/
struct {
unsigned binding_table_index:8;
unsigned msg_control:5;
unsigned msg_type:3;
unsigned pad0:3;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp_sampler_const_cache;
 
/**
* Message for the Sandybridge Render Cache Data Port.
*
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
* Section 3.9.2.1.1: Message Descriptor.
*
* "Slot Group Select" and "Last Render Target" are part of the
* 5-bit message control for Render Target Write messages. See
* Section 3.9.9.2.1 of the same volume.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_type:4;
unsigned send_commit_msg:1;
unsigned pad0:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad1:2;
unsigned end_of_thread:1;
} gen6_dp;
 
/**
* Message for any of the Gen7 Data Port caches.
*
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
* Data Port Messages / Message Descriptor. Once again, "Slot Group
* Select" and "Last Render Target" are part of the 6-bit message
* control for Render Target Writes.
*/
struct {
unsigned binding_table_index:8;
unsigned msg_control:3;
unsigned slot_group_select:1;
unsigned last_render_target:1;
unsigned msg_control_pad:1;
unsigned msg_type:4;
unsigned pad1:1;
unsigned header_present:1;
unsigned response_length:5;
unsigned msg_length:4;
unsigned pad2:2;
unsigned end_of_thread:1;
} gen7_dp;
/** @} */
 
struct {
unsigned src1_subreg_nr_high:1;
unsigned src1_reg_nr:8;
unsigned pad0:1;
unsigned src2_rep_ctrl:1;
unsigned src2_swizzle:8;
unsigned src2_subreg_nr:3;
unsigned src2_reg_nr:8;
unsigned pad1:2;
} da3src;
 
int d;
unsigned ud;
float f;
} bits3;
};
 
 
/* These aren't hardware structs, just something useful for us to pass around:
*
* Align1 operation has a lot of control over input ranges. Used in
* WM programs to implement shaders decomposed into "channel serial"
* or "structure of array" form:
*/
struct brw_reg {
unsigned type:4;
unsigned file:2;
unsigned nr:8;
unsigned subnr:5; /* :1 in align16 */
unsigned negate:1; /* source only */
unsigned abs:1; /* source only */
unsigned vstride:4; /* source only */
unsigned width:3; /* src only, align1 only */
unsigned hstride:2; /* align1 only */
unsigned address_mode:1; /* relative addressing, hopefully! */
unsigned pad0:1;
 
union {
struct {
unsigned swizzle:8; /* src only, align16 only */
unsigned writemask:4; /* dest only, align16 only */
int indirect_offset:10; /* relative addressing offset */
unsigned pad1:10; /* two dwords total */
} bits;
 
float f;
int d;
unsigned ud;
} dw1;
};
 
struct brw_indirect {
unsigned addr_subnr:4;
int addr_offset:10;
unsigned pad:18;
};
 
#define BRW_EU_MAX_INSN_STACK 5
#define BRW_EU_MAX_INSN 10000
 
struct brw_compile {
struct brw_instruction *store;
unsigned nr_insn;
 
int gen;
 
/* Allow clients to push/pop instruction state:
*/
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
bool compressed_stack[BRW_EU_MAX_INSN_STACK];
struct brw_instruction *current;
 
unsigned flag_value;
bool single_program_flow;
bool compressed;
 
/* Control flow stacks:
* - if_stack contains IF and ELSE instructions which must be patched
* (and popped) once the matching ENDIF instruction is encountered.
*/
struct brw_instruction **if_stack;
int if_stack_depth;
int if_stack_array_size;
};
 
static inline int type_sz(unsigned type)
{
switch (type) {
case BRW_REGISTER_TYPE_UD:
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_F:
return 4;
case BRW_REGISTER_TYPE_HF:
case BRW_REGISTER_TYPE_UW:
case BRW_REGISTER_TYPE_W:
return 2;
case BRW_REGISTER_TYPE_UB:
case BRW_REGISTER_TYPE_B:
return 1;
default:
return 0;
}
}
 
/**
* Construct a brw_reg.
* \param file one of the BRW_x_REGISTER_FILE values
* \param nr register number/index
* \param subnr register sub number
* \param type one of BRW_REGISTER_TYPE_x
* \param vstride one of BRW_VERTICAL_STRIDE_x
* \param width one of BRW_WIDTH_x
* \param hstride one of BRW_HORIZONTAL_STRIDE_x
* \param swizzle one of BRW_SWIZZLE_x
* \param writemask WRITEMASK_X/Y/Z/W bitfield
*/
static inline struct brw_reg brw_reg(unsigned file,
unsigned nr,
unsigned subnr,
unsigned type,
unsigned vstride,
unsigned width,
unsigned hstride,
unsigned swizzle,
unsigned writemask)
{
struct brw_reg reg;
if (file == BRW_GENERAL_REGISTER_FILE)
assert(nr < BRW_MAX_GRF);
else if (file == BRW_MESSAGE_REGISTER_FILE)
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
assert(nr <= BRW_ARF_IP);
 
reg.type = type;
reg.file = file;
reg.nr = nr;
reg.subnr = subnr * type_sz(type);
reg.negate = 0;
reg.abs = 0;
reg.vstride = vstride;
reg.width = width;
reg.hstride = hstride;
reg.address_mode = BRW_ADDRESS_DIRECT;
reg.pad0 = 0;
 
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
* set swizzle and writemask to W, as the lower bits of subnr will
* be lost when converted to align16. This is probably too much to
* keep track of as you'd want it adjusted by suboffset(), etc.
* Perhaps fix up when converting to align16?
*/
reg.dw1.bits.swizzle = swizzle;
reg.dw1.bits.writemask = writemask;
reg.dw1.bits.indirect_offset = 0;
reg.dw1.bits.pad1 = 0;
return reg;
}
 
/** Construct float[16] register */
static inline struct brw_reg brw_vec16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_16,
BRW_WIDTH_16,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[8] register */
static inline struct brw_reg brw_vec8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_8,
BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[4] register */
static inline struct brw_reg brw_vec4_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_4,
BRW_WIDTH_4,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
/** Construct float[2] register */
static inline struct brw_reg brw_vec2_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_2,
BRW_WIDTH_2,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYXY,
WRITEMASK_XY);
}
 
/** Construct float[1] register */
static inline struct brw_reg brw_vec1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return brw_reg(file,
nr,
subnr,
BRW_REGISTER_TYPE_F,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
 
static inline struct brw_reg __retype(struct brw_reg reg,
unsigned type)
{
reg.type = type;
return reg;
}
 
static inline struct brw_reg __retype_d(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg __retype_ud(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg __retype_uw(struct brw_reg reg)
{
return __retype(reg, BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg __sechalf(struct brw_reg reg)
{
if (reg.vstride)
reg.nr++;
return reg;
}
 
static inline struct brw_reg __suboffset(struct brw_reg reg,
unsigned delta)
{
reg.subnr += delta * type_sz(reg.type);
return reg;
}
 
static inline struct brw_reg __offset(struct brw_reg reg,
unsigned delta)
{
reg.nr += delta;
return reg;
}
 
static inline struct brw_reg byte_offset(struct brw_reg reg,
unsigned bytes)
{
unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
reg.nr = newoffset / REG_SIZE;
reg.subnr = newoffset % REG_SIZE;
return reg;
}
 
 
/** Construct unsigned word[16] register */
static inline struct brw_reg brw_uw16_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[8] register */
static inline struct brw_reg brw_uw8_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
/** Construct unsigned word[1] register */
static inline struct brw_reg brw_uw1_reg(unsigned file,
unsigned nr,
unsigned subnr)
{
return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
}
 
static inline struct brw_reg brw_imm_reg(unsigned type)
{
return brw_reg( BRW_IMMEDIATE_VALUE,
0,
0,
type,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
0,
0);
}
 
/** Construct float immediate register */
static inline struct brw_reg brw_imm_f(float f)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
imm.dw1.f = f;
return imm;
}
 
/** Construct integer immediate register */
static inline struct brw_reg brw_imm_d(int d)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
imm.dw1.d = d;
return imm;
}
 
/** Construct uint immediate register */
static inline struct brw_reg brw_imm_ud(unsigned ud)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
imm.dw1.ud = ud;
return imm;
}
 
/** Construct ushort immediate register */
static inline struct brw_reg brw_imm_uw(uint16_t uw)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
imm.dw1.ud = uw | (uw << 16);
return imm;
}
 
/** Construct short immediate register */
static inline struct brw_reg brw_imm_w(int16_t w)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
imm.dw1.d = w | (w << 16);
return imm;
}
 
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
* numbers alias with _V and _VF below:
*/
 
/** Construct vector of eight signed half-byte values */
static inline struct brw_reg brw_imm_v(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_8;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
/** Construct vector of four 8-bit float values */
static inline struct brw_reg brw_imm_vf(unsigned v)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = v;
return imm;
}
 
#define VF_ZERO 0x0
#define VF_ONE 0x30
#define VF_NEG (1<<7)
 
static inline struct brw_reg brw_imm_vf4(unsigned v0,
unsigned v1,
unsigned v2,
unsigned v3)
{
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
imm.vstride = BRW_VERTICAL_STRIDE_0;
imm.width = BRW_WIDTH_4;
imm.hstride = BRW_HORIZONTAL_STRIDE_1;
imm.dw1.ud = ((v0 << 0) |
(v1 << 8) |
(v2 << 16) |
(v3 << 24));
return imm;
}
 
static inline struct brw_reg brw_address(struct brw_reg reg)
{
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
}
 
/** Construct float[1] general-purpose register */
static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
{
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[2] general-purpose register */
static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
{
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[4] general-purpose register */
static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
{
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct float[8] general-purpose register */
static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
{
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
{
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
{
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
}
 
/** Construct null register (usually used for setting condition codes) */
static inline struct brw_reg brw_null_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NULL,
0);
}
 
static inline struct brw_reg brw_address_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ADDRESS,
subnr);
}
 
/* If/else instructions break in align16 mode if writemask & swizzle
* aren't xyzw. This goes against the convention for other scalar
* regs:
*/
static inline struct brw_reg brw_ip_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_IP,
0,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_4, /* ? */
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XYZW, /* NOTE! */
WRITEMASK_XYZW); /* NOTE! */
}
 
static inline struct brw_reg brw_acc_reg(void)
{
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_ACCUMULATOR,
0);
}
 
static inline struct brw_reg brw_notification_1_reg(void)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_NOTIFICATION_COUNT,
1,
BRW_REGISTER_TYPE_UD,
BRW_VERTICAL_STRIDE_0,
BRW_WIDTH_1,
BRW_HORIZONTAL_STRIDE_0,
BRW_SWIZZLE_XXXX,
WRITEMASK_X);
}
 
static inline struct brw_reg brw_flag_reg(void)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_FLAG,
0);
}
 
static inline struct brw_reg brw_mask_reg(unsigned subnr)
{
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_MASK,
subnr);
}
 
static inline struct brw_reg brw_message_reg(unsigned nr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
}
 
static inline struct brw_reg brw_message4_reg(unsigned nr,
int subnr)
{
assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
}
 
/* This is almost always called with a numeric constant argument, so
* make things easy to evaluate at compile time:
*/
static inline unsigned cvt(unsigned val)
{
switch (val) {
case 0: return 0;
case 1: return 1;
case 2: return 2;
case 4: return 3;
case 8: return 4;
case 16: return 5;
case 32: return 6;
}
return 0;
}
 
static inline struct brw_reg __stride(struct brw_reg reg,
unsigned vstride,
unsigned width,
unsigned hstride)
{
reg.vstride = cvt(vstride);
reg.width = cvt(width) - 1;
reg.hstride = cvt(hstride);
return reg;
}
 
static inline struct brw_reg vec16(struct brw_reg reg)
{
return __stride(reg, 16,16,1);
}
 
static inline struct brw_reg vec8(struct brw_reg reg)
{
return __stride(reg, 8,8,1);
}
 
static inline struct brw_reg vec4(struct brw_reg reg)
{
return __stride(reg, 4,4,1);
}
 
static inline struct brw_reg vec2(struct brw_reg reg)
{
return __stride(reg, 2,2,1);
}
 
static inline struct brw_reg vec1(struct brw_reg reg)
{
return __stride(reg, 0,1,0);
}
 
static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(reg, elt));
}
 
static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
{
return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
 
static inline struct brw_reg brw_swizzle(struct brw_reg reg,
unsigned x,
unsigned y,
unsigned z,
unsigned w)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
 
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
return reg;
}
 
static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
unsigned x)
{
return brw_swizzle(reg, x, x, x, x);
}
 
static inline struct brw_reg brw_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask &= mask;
return reg;
}
 
static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
unsigned mask)
{
assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = mask;
return reg;
}
 
static inline struct brw_reg brw_negate(struct brw_reg reg)
{
reg.negate ^= 1;
return reg;
}
 
static inline struct brw_reg brw_abs(struct brw_reg reg)
{
reg.abs = 1;
return reg;
}
 
/***********************************************************************
*/
static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec4_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
int offset)
{
struct brw_reg reg = brw_vec1_grf(0, 0);
reg.subnr = subnr;
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
reg.dw1.bits.indirect_offset = offset;
return reg;
}
 
static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
{
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
{
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
}
 
static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
{
return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
}
 
static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
}
 
static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
}
 
static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
{
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
}
 
static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
{
return brw_address_reg(ptr.addr_subnr);
}
 
static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
{
ptr.addr_offset += offset;
return ptr;
}
 
static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
{
struct brw_indirect ptr;
ptr.addr_subnr = addr_subnr;
ptr.addr_offset = offset;
ptr.pad = 0;
return ptr;
}
 
/** Do two brw_regs refer to the same register? */
static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
{
return r1.file == r2.file && r1.nr == r2.nr;
}
 
static inline struct brw_instruction *current_insn( struct brw_compile *p)
{
return &p->store[p->nr_insn];
}
 
static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
{
p->current->header.predicate_control = pc;
}
 
static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
{
p->current->header.predicate_inverse = predicate_inverse;
}
 
static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
{
p->current->header.destreg__conditionalmod = conditional;
}
 
static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
{
p->current->header.access_mode = access_mode;
}
 
static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
{
p->current->header.mask_control = value;
}
 
static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
{
p->current->header.saturate = value;
}
 
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
{
if (p->gen >= 060)
p->current->header.acc_wr_control = value;
}
 
void brw_pop_insn_state(struct brw_compile *p);
void brw_push_insn_state(struct brw_compile *p);
void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
 
void brw_compile_init(struct brw_compile *p, int gen, void *store);
 
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest);
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg);
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg);
 
void gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr);
 
static inline struct brw_instruction *
brw_next_insn(struct brw_compile *p, unsigned opcode)
{
struct brw_instruction *insn;
 
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
 
insn = &p->store[p->nr_insn++];
*insn = *p->current;
 
if (p->current->header.destreg__conditionalmod) {
p->current->header.destreg__conditionalmod = 0;
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
}
 
insn->header.opcode = opcode;
return insn;
}
 
/* Helpers for regular instructions: */
#define ALU1(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0) \
{ \
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \
}
 
#define ALU2(OP) \
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src0, \
struct brw_reg src1) \
{ \
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \
}
 
/* Rounding operations (other than RNDD) require two instructions - the first
* stores a rounded value (possibly the wrong way) in the dest register, but
* also sets a per-channel "increment bit" in the flag register. A predicated
* add of 1.0 fixes dest to contain the desired result.
*
* Sandybridge and later appear to round correctly without an ADD.
*/
#define ROUND(OP) \
static inline void brw_##OP(struct brw_compile *p, \
struct brw_reg dest, \
struct brw_reg src) \
{ \
struct brw_instruction *rnd, *add; \
rnd = brw_next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
brw_set_src0(p, rnd, src); \
if (p->gen < 060) { \
/* turn on round-increments */ \
rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
add->header.predicate_control = BRW_PREDICATE_NORMAL; \
} \
}
 
static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src)
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
return insn;
}
 
static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
unsigned opcode,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1 )
{
struct brw_instruction *insn = brw_next_insn(p, opcode);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
return insn;
}
 
static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.2.2: add */
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
/* 6.32.38: mul */
if (src0.type == BRW_REGISTER_TYPE_D ||
src0.type == BRW_REGISTER_TYPE_UD ||
src1.type == BRW_REGISTER_TYPE_D ||
src1.type == BRW_REGISTER_TYPE_UD) {
assert(dest.type != BRW_REGISTER_TYPE_F);
}
 
if (src0.type == BRW_REGISTER_TYPE_F ||
(src0.file == BRW_IMMEDIATE_VALUE &&
src0.type == BRW_REGISTER_TYPE_VF)) {
assert(src1.type != BRW_REGISTER_TYPE_UD);
assert(src1.type != BRW_REGISTER_TYPE_D);
}
 
if (src1.type == BRW_REGISTER_TYPE_F ||
(src1.file == BRW_IMMEDIATE_VALUE &&
src1.type == BRW_REGISTER_TYPE_VF)) {
assert(src0.type != BRW_REGISTER_TYPE_UD);
assert(src0.type != BRW_REGISTER_TYPE_D);
}
 
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src0.nr != BRW_ARF_ACCUMULATOR);
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
src1.nr != BRW_ARF_ACCUMULATOR);
 
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}
 
static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
 
insn->header.execution_size = 1;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_DISABLE;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
 
ALU1(MOV);
ALU2(SEL);
ALU1(NOT);
ALU2(AND);
ALU2(OR);
ALU2(XOR);
ALU2(SHR);
ALU2(SHL);
ALU2(RSR);
ALU2(RSL);
ALU2(ASR);
ALU1(FRC);
ALU1(RNDD);
ALU2(MAC);
ALU2(MACH);
ALU1(LZD);
ALU2(DP4);
ALU2(DPH);
ALU2(DP3);
ALU2(DP2);
ALU2(LINE);
ALU2(PLN);
 
ROUND(RNDZ);
ROUND(RNDE);
 
#undef ALU1
#undef ALU2
#undef ROUND
 
/* Helpers for SEND instruction */
void brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length);
 
void brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg);
 
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle);
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot);
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present);
 
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode);
 
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision);
 
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision);
 
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1);
 
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index);
 
void brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset);
 
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index);
 
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index);
 
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addrReg,
unsigned offset,
unsigned bind_table_index);
 
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
struct brw_instruction *brw_IF(struct brw_compile *p,
unsigned execute_size);
struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1);
 
void brw_ELSE(struct brw_compile *p);
void brw_ENDIF(struct brw_compile *p);
 
/* DO/WHILE loops:
*/
struct brw_instruction *brw_DO(struct brw_compile *p,
unsigned execute_size);
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn);
 
void brw_NOP(struct brw_compile *p);
 
void brw_WAIT(struct brw_compile *p);
 
/* Special case: there is never a destination, execution size will be
* taken from src0:
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1);
 
void brw_print_reg(struct brw_reg reg);
 
static inline void brw_math_invert(struct brw_compile *p,
struct brw_reg dst,
struct brw_reg src)
{
brw_math(p,
dst,
BRW_MATH_FUNCTION_INV,
BRW_MATH_SATURATE_NONE,
0,
src,
BRW_MATH_PRECISION_FULL,
BRW_MATH_DATA_VECTOR);
}
 
void brw_set_uip_jip(struct brw_compile *p);
 
uint32_t brw_swap_cmod(uint32_t cmod);
 
void brw_disasm(FILE *file,
const struct brw_instruction *inst,
int gen);
 
#endif
/drivers/video/Intel-2D/brw/brw_eu_emit.c
0,0 → 1,2002
/*
Copyright (C) Intel Corp. 2006. All Rights Reserved.
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
develop this 3D driver.
 
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
 
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
 
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
**********************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
 
#include "brw_eu.h"
 
#include <string.h>
#include <stdlib.h>
 
/***********************************************************************
* Internal helper for constructing instructions
*/
 
static void guess_execution_size(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width;
}
 
 
/**
* Prior to Sandybridge, the SEND instruction accepted non-MRF source
* registers, implicitly moving the operand to a message register.
*
* On Sandybridge, this is no longer the case. This function performs the
* explicit move; it should be called before emitting a SEND instruction.
*/
void
gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
unsigned msg_reg_nr)
{
if (p->gen < 060)
return;
 
if (src->file == BRW_MESSAGE_REGISTER_FILE)
return;
 
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
brw_pop_insn_state(p);
}
*src = brw_message_reg(msg_reg_nr);
}
 
static void
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
{
/* From the BSpec / ISA Reference / send - [DevIVB+]:
* "The send with EOT should use register space R112-R127 for <src>. This is
* to enable loading of a new thread into the same slot while the message
* with EOT for current thread is pending dispatch."
*
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += 111;
}
}
 
void
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
 
gen7_convert_mrf_to_grf(p, &dest);
 
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
 
if (dest.address_mode == BRW_ADDRESS_DIRECT) {
insn->bits1.da1.dest_reg_nr = dest.nr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.da1.dest_subreg_nr = dest.subnr;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.da1.dest_horiz_stride = dest.hstride;
} else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
/* even ignored in da16, still need to set as '01' */
insn->bits1.da16.dest_horiz_stride = 1;
}
} else {
insn->bits1.ia1.dest_subreg_nr = dest.subnr;
 
/* These are different sizes in align1 vs align16:
*/
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
dest.hstride = BRW_HORIZONTAL_STRIDE_1;
insn->bits1.ia1.dest_horiz_stride = dest.hstride;
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
/* even ignored in da16, still need to set as '01' */
insn->bits1.ia16.dest_horiz_stride = 1;
}
}
 
guess_execution_size(p, insn, dest);
}
 
static const int reg_type_size[8] = {
[0] = 4,
[1] = 4,
[2] = 2,
[3] = 2,
[4] = 1,
[5] = 1,
[7] = 4
};
 
static void
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
{
int hstride_for_reg[] = {0, 1, 2, 4};
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
int width_for_reg[] = {1, 2, 4, 8, 16};
int execsize_for_reg[] = {1, 2, 4, 8, 16};
int width, hstride, vstride, execsize;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
/* 3.3.6: Region Parameters. Restriction: Immediate vectors
* mean the destination has to be 128-bit aligned and the
* destination horiz stride has to be a word.
*/
if (reg.type == BRW_REGISTER_TYPE_V) {
assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
}
 
return;
}
 
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
reg.file == BRW_ARF_NULL)
return;
 
hstride = hstride_for_reg[reg.hstride];
 
if (reg.vstride == 0xf) {
vstride = -1;
} else {
vstride = vstride_for_reg[reg.vstride];
}
 
width = width_for_reg[reg.width];
 
execsize = execsize_for_reg[insn->header.execution_size];
 
/* Restrictions from 3.3.10: Register Region Restrictions. */
/* 3. */
assert(execsize >= width);
 
/* 4. */
if (execsize == width && hstride != 0) {
assert(vstride == -1 || vstride == width * hstride);
}
 
/* 5. */
if (execsize == width && hstride == 0) {
/* no restriction on vstride. */
}
 
/* 6. */
if (width == 1) {
assert(hstride == 0);
}
 
/* 7. */
if (execsize == 1 && width == 1) {
assert(hstride == 0);
assert(vstride == 0);
}
 
/* 8. */
if (vstride == 0 && hstride == 0) {
assert(width == 1);
}
 
/* 10. Check destination issues. */
}
 
void
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src0_reg_file = reg.file;
insn->bits1.da1.src0_reg_type = reg.type;
insn->bits2.da1.src0_abs = reg.abs;
insn->bits2.da1.src0_negate = reg.negate;
insn->bits2.da1.src0_address_mode = reg.address_mode;
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
 
/* Required to set some fields in src1 as well:
*/
insn->bits1.da1.src1_reg_file = 0; /* arf */
insn->bits1.da1.src1_reg_type = reg.type;
} else {
if (reg.address_mode == BRW_ADDRESS_DIRECT) {
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.da1.src0_subreg_nr = reg.subnr;
insn->bits2.da1.src0_reg_nr = reg.nr;
} else {
insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
insn->bits2.da16.src0_reg_nr = reg.nr;
}
} else {
insn->bits2.ia1.src0_subreg_nr = reg.subnr;
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
} else {
insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
}
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits2.da1.src0_width = BRW_WIDTH_1;
insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits2.da1.src0_horiz_stride = reg.hstride;
insn->bits2.da1.src0_width = reg.width;
insn->bits2.da1.src0_vert_stride = reg.vstride;
}
} else {
insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits2.da16.src0_vert_stride = reg.vstride;
}
}
}
 
void brw_set_src1(struct brw_compile *p,
struct brw_instruction *insn,
struct brw_reg reg)
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
assert(reg.nr < 128);
 
gen7_convert_mrf_to_grf(p, &reg);
 
validate_reg(insn, reg);
 
insn->bits1.da1.src1_reg_file = reg.file;
insn->bits1.da1.src1_reg_type = reg.type;
insn->bits3.da1.src1_abs = reg.abs;
insn->bits3.da1.src1_negate = reg.negate;
 
/* Only src1 can be immediate in two-argument instructions. */
assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
 
if (reg.file == BRW_IMMEDIATE_VALUE) {
insn->bits3.ud = reg.dw1.ud;
} else {
/* This is a hardware restriction, which may or may not be lifted
* in the future:
*/
assert (reg.address_mode == BRW_ADDRESS_DIRECT);
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
if (insn->header.access_mode == BRW_ALIGN_1) {
insn->bits3.da1.src1_subreg_nr = reg.subnr;
insn->bits3.da1.src1_reg_nr = reg.nr;
} else {
insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
insn->bits3.da16.src1_reg_nr = reg.nr;
}
 
if (insn->header.access_mode == BRW_ALIGN_1) {
if (reg.width == BRW_WIDTH_1 &&
insn->header.execution_size == BRW_EXECUTE_1) {
insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
insn->bits3.da1.src1_width = BRW_WIDTH_1;
insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
} else {
insn->bits3.da1.src1_horiz_stride = reg.hstride;
insn->bits3.da1.src1_width = reg.width;
insn->bits3.da1.src1_vert_stride = reg.vstride;
}
} else {
insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
 
/* This is an oddity of the fact we're using the same
* descriptions for registers in align_16 as align_1:
*/
if (reg.vstride == BRW_VERTICAL_STRIDE_8)
insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
else
insn->bits3.da16.src1_vert_stride = reg.vstride;
}
}
}
 
/**
* Set the Message Descriptor and Extended Message Descriptor fields
* for SEND messages.
*
* \note This zeroes out the Function Control bits, so it must be called
* \b before filling out any message-specific data. Callers can
* choose not to fill in irrelevant bits; they will be zero.
*/
static void
brw_set_message_descriptor(struct brw_compile *p,
struct brw_instruction *inst,
enum brw_message_target sfid,
unsigned msg_length,
unsigned response_length,
bool header_present,
bool end_of_thread)
{
brw_set_src1(p, inst, brw_imm_d(0));
 
if (p->gen >= 050) {
inst->bits3.generic_gen5.header_present = header_present;
inst->bits3.generic_gen5.response_length = response_length;
inst->bits3.generic_gen5.msg_length = msg_length;
inst->bits3.generic_gen5.end_of_thread = end_of_thread;
 
if (p->gen >= 060) {
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
inst->header.destreg__conditionalmod = sfid;
} else {
/* Set Extended Message Descriptor (ex_desc) */
inst->bits2.send_gen5.sfid = sfid;
inst->bits2.send_gen5.end_of_thread = end_of_thread;
}
} else {
inst->bits3.generic.response_length = response_length;
inst->bits3.generic.msg_length = msg_length;
inst->bits3.generic.msg_target = sfid;
inst->bits3.generic.end_of_thread = end_of_thread;
}
}
 
 
static void brw_set_math_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned function,
unsigned integer_type,
bool low_precision,
bool saturate,
unsigned dataType)
{
unsigned msg_length;
unsigned response_length;
 
/* Infer message length from the function */
switch (function) {
case BRW_MATH_FUNCTION_POW:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
msg_length = 2;
break;
default:
msg_length = 1;
break;
}
 
/* Infer response length from the function */
switch (function) {
case BRW_MATH_FUNCTION_SINCOS:
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
response_length = 2;
break;
default:
response_length = 1;
break;
}
 
brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
msg_length, response_length,
false, false);
if (p->gen == 050) {
insn->bits3.math_gen5.function = function;
insn->bits3.math_gen5.int_type = integer_type;
insn->bits3.math_gen5.precision = low_precision;
insn->bits3.math_gen5.saturate = saturate;
insn->bits3.math_gen5.data_type = dataType;
insn->bits3.math_gen5.snapshot = 0;
} else {
insn->bits3.math.function = function;
insn->bits3.math.int_type = integer_type;
insn->bits3.math.precision = low_precision;
insn->bits3.math.saturate = saturate;
insn->bits3.math.data_type = dataType;
}
}
 
static void brw_set_ff_sync_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
unsigned response_length,
bool end_of_thread)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
1, response_length,
true, end_of_thread);
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
}
 
static void brw_set_urb_message(struct brw_compile *p,
struct brw_instruction *insn,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool end_of_thread,
bool complete,
unsigned offset,
unsigned swizzle_control)
{
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
msg_length, response_length, true, end_of_thread);
if (p->gen >= 070) {
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
insn->bits3.urb_gen7.offset = offset;
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
insn->bits3.urb_gen7.swizzle_control = swizzle_control;
/* per_slot_offset = 0 makes it ignore offsets in message header */
insn->bits3.urb_gen7.per_slot_offset = 0;
insn->bits3.urb_gen7.complete = complete;
} else if (p->gen >= 050) {
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
insn->bits3.urb_gen5.offset = offset;
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
insn->bits3.urb_gen5.allocate = allocate;
insn->bits3.urb_gen5.used = used; /* ? */
insn->bits3.urb_gen5.complete = complete;
} else {
insn->bits3.urb.opcode = 0; /* ? */
insn->bits3.urb.offset = offset;
insn->bits3.urb.swizzle_control = swizzle_control;
insn->bits3.urb.allocate = allocate;
insn->bits3.urb.used = used; /* ? */
insn->bits3.urb.complete = complete;
}
}
 
void
brw_set_dp_write_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned msg_length,
bool header_present,
bool last_render_target,
unsigned response_length,
bool end_of_thread,
bool send_commit_msg)
{
unsigned sfid;
 
if (p->gen >= 070) {
/* Use the Render Cache for RT writes; otherwise use the Data Cache */
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
/* Use the render cache for all write messages. */
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_WRITE;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
header_present, end_of_thread);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = last_render_target;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = last_render_target;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
} else if (p->gen >= 050) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.last_render_target = last_render_target;
insn->bits3.dp_write_gen5.msg_type = msg_type;
insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
} else {
insn->bits3.dp_write.binding_table_index = binding_table_index;
insn->bits3.dp_write.msg_control = msg_control;
insn->bits3.dp_write.last_render_target = last_render_target;
insn->bits3.dp_write.msg_type = msg_type;
insn->bits3.dp_write.send_commit_msg = send_commit_msg;
}
}
 
void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
unsigned target_cache,
unsigned msg_length,
unsigned response_length)
{
unsigned sfid;
 
if (p->gen >= 070) {
sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
} else if (p->gen >= 060) {
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
else
sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
} else {
sfid = BRW_SFID_DATAPORT_READ;
}
 
brw_set_message_descriptor(p, insn, sfid,
msg_length, response_length,
true, false);
 
if (p->gen >= 070) {
insn->bits3.gen7_dp.binding_table_index = binding_table_index;
insn->bits3.gen7_dp.msg_control = msg_control;
insn->bits3.gen7_dp.last_render_target = 0;
insn->bits3.gen7_dp.msg_type = msg_type;
} else if (p->gen >= 060) {
insn->bits3.gen6_dp.binding_table_index = binding_table_index;
insn->bits3.gen6_dp.msg_control = msg_control;
insn->bits3.gen6_dp.last_render_target = 0;
insn->bits3.gen6_dp.msg_type = msg_type;
insn->bits3.gen6_dp.send_commit_msg = 0;
} else if (p->gen >= 050) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
insn->bits3.dp_read_gen5.msg_type = msg_type;
insn->bits3.dp_read_gen5.target_cache = target_cache;
} else if (p->gen >= 045) {
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
insn->bits3.dp_read.msg_type = msg_type; /*12:13*/
insn->bits3.dp_read.target_cache = target_cache; /*14:15*/
}
}
 
static void brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
unsigned binding_table_index,
unsigned sampler,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
msg_length, response_length,
header_present, false);
 
if (p->gen >= 070) {
insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
insn->bits3.sampler_gen7.sampler = sampler;
insn->bits3.sampler_gen7.msg_type = msg_type;
insn->bits3.sampler_gen7.simd_mode = simd_mode;
} else if (p->gen >= 050) {
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
insn->bits3.sampler_gen5.sampler = sampler;
insn->bits3.sampler_gen5.msg_type = msg_type;
insn->bits3.sampler_gen5.simd_mode = simd_mode;
} else if (p->gen >= 045) {
insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
insn->bits3.sampler_g4x.sampler = sampler;
insn->bits3.sampler_g4x.msg_type = msg_type;
} else {
insn->bits3.sampler.binding_table_index = binding_table_index;
insn->bits3.sampler.sampler = sampler;
insn->bits3.sampler.msg_type = msg_type;
insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
}
}
 
 
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_ud(0x0));
}
 
/***********************************************************************
* Comparisons, if/else/endif
*/
 
static void
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
{
p->if_stack[p->if_stack_depth] = inst;
 
p->if_stack_depth++;
if (p->if_stack_array_size <= p->if_stack_depth) {
p->if_stack_array_size *= 2;
p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
}
}
 
/* EU takes the value from the flag register and pushes it onto some
* sort of a stack (presumably merging with any flag value already on
* the stack). Within an if block, the flags at the top of the stack
* control execution on each channel of the unit, eg. on each of the
* 16 pixel values in our wm programs.
*
* When the matching 'else' instruction is reached (presumably by
* countdown of the instruction count patched in by our ELSE/ENDIF
* functions), the relevent flags are inverted.
*
* When the matching 'endif' instruction is reached, the flags are
* popped off. If the stack is now empty, normal execution resumes.
*/
struct brw_instruction *
brw_IF(struct brw_compile *p, unsigned execute_size)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
/* Override the defaults for this instruction: */
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.execution_size = execute_size;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.predicate_control = BRW_PREDICATE_NORMAL;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
push_if_stack(p, insn);
return insn;
}
 
/* This function is only used for gen6-style IF instructions with an
* embedded comparison (conditional modifier). It is not used on gen7.
*/
struct brw_instruction *
gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_IF);
 
brw_set_dest(p, insn, brw_imm_w(0));
if (p->compressed) {
insn->header.execution_size = BRW_EXECUTE_16;
} else {
insn->header.execution_size = BRW_EXECUTE_8;
}
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = conditional;
 
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
return insn;
}
 
/**
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
*/
static void
convert_IF_ELSE_to_ADD(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst)
{
/* The next instruction (where the ENDIF would be, if it existed) */
struct brw_instruction *next_inst = &p->store[p->nr_insn];
 
assert(p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
assert(if_inst->header.execution_size == BRW_EXECUTE_1);
 
/* Convert IF to an ADD instruction that moves the instruction pointer
* to the first instruction of the ELSE block. If there is no ELSE
* block, point to where ENDIF would be. Reverse the predicate.
*
* There's no need to execute an ENDIF since we don't need to do any
* stack operations, and if we're currently executing, we just want to
* continue normally.
*/
if_inst->header.opcode = BRW_OPCODE_ADD;
if_inst->header.predicate_inverse = 1;
 
if (else_inst != NULL) {
/* Convert ELSE to an ADD instruction that points where the ENDIF
* would be.
*/
else_inst->header.opcode = BRW_OPCODE_ADD;
 
if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
else_inst->bits3.ud = (next_inst - else_inst) * 16;
} else {
if_inst->bits3.ud = (next_inst - if_inst) * 16;
}
}
 
/**
* Patch IF and ELSE instructions with appropriate jump targets.
*/
static void
patch_IF_ELSE(struct brw_compile *p,
struct brw_instruction *if_inst,
struct brw_instruction *else_inst,
struct brw_instruction *endif_inst)
{
unsigned br = 1;
 
assert(!p->single_program_flow);
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
assert(endif_inst != NULL);
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
 
/* Jump count is for 64bit data chunk each, so one 128bit instruction
* requires 2 chunks.
*/
if (p->gen >= 050)
br = 2;
 
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
endif_inst->header.execution_size = if_inst->header.execution_size;
 
if (else_inst == NULL) {
/* Patch IF -> ENDIF */
if (p->gen < 060) {
/* Turn it into an IFF, which means no mask stack operations for
* all-false and jumping past the ENDIF.
*/
if_inst->header.opcode = BRW_OPCODE_IFF;
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
} else {
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
}
} else {
else_inst->header.execution_size = if_inst->header.execution_size;
 
/* Patch IF -> ELSE */
if (p->gen < 060) {
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
} else if (p->gen <= 070) {
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
}
 
/* Patch ELSE -> ENDIF */
if (p->gen < 060) {
/* BRW_OPCODE_ELSE pre-gen6 should point just past the
* matching ENDIF.
*/
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
else_inst->bits3.if_else.pop_count = 1;
else_inst->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
} else {
/* The IF instruction's JIP should point just past the ELSE */
if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
}
}
}
 
void
brw_ELSE(struct brw_compile *p)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_ELSE);
 
if (p->gen < 060) {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = 0;
insn->bits3.break_cont.uip = 0;
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
 
push_if_stack(p, insn);
}
 
void
brw_ENDIF(struct brw_compile *p)
{
struct brw_instruction *insn;
struct brw_instruction *else_inst = NULL;
struct brw_instruction *if_inst = NULL;
 
/* Pop the IF and (optional) ELSE instructions from the stack */
p->if_stack_depth--;
if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
else_inst = p->if_stack[p->if_stack_depth];
p->if_stack_depth--;
}
if_inst = p->if_stack[p->if_stack_depth];
 
if (p->single_program_flow) {
/* ENDIF is useless; don't bother emitting it. */
convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
return;
}
 
insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
 
if (p->gen < 060) {
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else if (p->gen < 070) {
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
} else {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
}
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.mask_control = BRW_MASK_ENABLE;
insn->header.thread_control = BRW_THREAD_SWITCH;
 
/* Also pop item off the stack in the endif instruction: */
if (p->gen < 060) {
insn->bits3.if_else.jump_count = 0;
insn->bits3.if_else.pop_count = 1;
insn->bits3.if_else.pad0 = 0;
} else if (p->gen < 070) {
insn->bits1.branch_gen6.jump_count = 2;
} else {
insn->bits3.break_cont.jip = 2;
}
patch_IF_ELSE(p, if_inst, else_inst, insn);
}
 
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_BREAK);
if (p->gen >= 060) {
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_d(0x0));
} else {
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
 
return insn;
}
 
struct brw_instruction *gen6_CONT(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
 
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
return insn;
}
 
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
insn->bits3.if_else.pad0 = 0;
insn->bits3.if_else.pop_count = pop_count;
return insn;
}
 
/* DO/WHILE loop:
*
* The DO/WHILE is just an unterminated loop -- break or continue are
* used for control within the loop. We have a few ways they can be
* done.
*
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
* jip and no DO instruction.
*
* For non-uniform control flow pre-gen6, there's a DO instruction to
* push the mask, and a WHILE to jump back, and BREAK to get out and
* pop the mask.
*
* For gen6, there's no more mask stack, so no need for DO. WHILE
* just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
{
if (p->gen >= 060 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
 
/* Override the defaults for this instruction:
*/
brw_set_dest(p, insn, brw_null_reg());
brw_set_src0(p, insn, brw_null_reg());
brw_set_src1(p, insn, brw_null_reg());
 
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = execute_size;
insn->header.predicate_control = BRW_PREDICATE_NONE;
/* insn->header.mask_control = BRW_MASK_ENABLE; */
/* insn->header.mask_control = BRW_MASK_DISABLE; */
 
return insn;
}
}
 
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
unsigned br = 1;
 
if (p->gen >= 050)
br = 2;
 
if (p->gen >= 070) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, __retype_d(brw_null_reg()));
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, brw_imm_ud(0));
insn->bits3.break_cont.jip = br * (do_insn - insn);
 
insn->header.execution_size = BRW_EXECUTE_8;
} else if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
brw_set_src0(p, insn, __retype_d(brw_null_reg()));
brw_set_src1(p, insn, __retype_d(brw_null_reg()));
 
insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = brw_next_insn(p, BRW_OPCODE_ADD);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
insn->header.execution_size = BRW_EXECUTE_1;
} else {
insn = brw_next_insn(p, BRW_OPCODE_WHILE);
 
assert(do_insn->header.opcode == BRW_OPCODE_DO);
 
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0));
 
insn->header.execution_size = do_insn->header.execution_size;
insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
insn->bits3.if_else.pop_count = 0;
insn->bits3.if_else.pad0 = 0;
}
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
p->current->header.predicate_control = BRW_PREDICATE_NONE;
 
return insn;
}
 
/* FORWARD JUMPS:
*/
void brw_land_fwd_jump(struct brw_compile *p,
struct brw_instruction *jmp_insn)
{
struct brw_instruction *landing = &p->store[p->nr_insn];
unsigned jmpi = 1;
 
if (p->gen >= 050)
jmpi = 2;
 
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
}
 
 
 
/* To integrate with the above, it makes sense that the comparison
* instruction should populate the flag register. It might be simpler
* just to use the flag reg for most WM tasks?
*/
void brw_CMP(struct brw_compile *p,
struct brw_reg dest,
unsigned conditional,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
 
insn->header.destreg__conditionalmod = conditional;
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
 
/* Make it so that future instructions will use the computed flag
* value until brw_set_predicate_control_flag_value() is called
* again.
*/
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
dest.nr == 0) {
p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
p->flag_value = 0xff;
}
}
 
/* Issue 'wait' instruction for n1, host could program MMIO
to wake up thread. */
void brw_WAIT(struct brw_compile *p)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
 
brw_set_dest(p, insn, src);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
insn->header.predicate_control = 0;
insn->header.compression_control = 0;
}
 
/***********************************************************************
* Helpers for the various SEND message types:
*/
 
/** Extended math function, float[8].
*/
void brw_math(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned data_type,
unsigned precision)
{
if (p->gen >= 060) {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
}
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
} else {
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
/* Example code doesn't set predicate_control for send
* instructions.
*/
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
src.type == BRW_REGISTER_TYPE_D,
precision,
saturate,
data_type);
}
}
 
/** Extended math function, float[8].
*/
void brw_math2(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
struct brw_reg src0,
struct brw_reg src1)
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
assert(dest.file == BRW_GENERAL_REGISTER_FILE);
assert(src0.file == BRW_GENERAL_REGISTER_FILE);
assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
 
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src0.type == BRW_REGISTER_TYPE_F);
assert(src1.type == BRW_REGISTER_TYPE_F);
}
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src0.negate);
assert(!src0.abs);
assert(!src1.negate);
assert(!src1.abs);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
}
 
/**
* Extended math function, float[16].
* Use 2 send instructions.
*/
void brw_math_16(struct brw_compile *p,
struct brw_reg dest,
unsigned function,
unsigned saturate,
unsigned msg_reg_nr,
struct brw_reg src,
unsigned precision)
{
struct brw_instruction *insn;
 
if (p->gen >= 060) {
insn = brw_next_insn(p, BRW_OPCODE_MATH);
 
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
insn->header.saturate = saturate;
 
/* Source modifiers are ignored for extended math instructions. */
assert(!src.negate);
assert(!src.abs);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_src1(p, insn, brw_null_reg());
return;
}
 
/* First instruction:
*/
brw_push_insn_state(p);
brw_set_predicate_control_flag_value(p, 0xff);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
/* Second instruction:
*/
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
 
brw_set_dest(p, insn, __offset(dest,1));
brw_set_src0(p, insn, src);
brw_set_math_message(p, insn, function,
BRW_MATH_INTEGER_UNSIGNED,
precision,
saturate,
BRW_MATH_DATA_VECTOR);
 
brw_pop_insn_state(p);
}
 
/**
* Write a block of OWORDs (half a GRF each) from the scratch buffer,
* using a constant offset per channel.
*
* The offset must be aligned to oword size (16 bytes). Used for
* register spilling.
*/
void brw_oword_block_write_scratch(struct brw_compile *p,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control, msg_type;
int mlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
mlen = 2;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
mlen = 3;
}
 
/* Set up the message header. This is g0, with g0.2 filled with
* the offset. We don't want to leave our offset around in g0 or
* it'll screw up texture samples, so set it up inside the message
* reg.
*/
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_reg dest;
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
int send_commit_msg;
struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
 
if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
insn->header.compression_control = BRW_COMPRESSION_NONE;
src_header = vec16(src_header);
}
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* Until gen6, writes followed by reads from the same location
* are not guaranteed to be ordered unless write_commit is set.
* If set, then a no-op write is issued to the destination
* register to set a dependency, and a read from the destination
* can be used to ensure the ordering.
*
* For gen6, only writes between different threads need ordering
* protection. Our use of DP writes is all about register
* spilling within a thread.
*/
if (p->gen >= 060) {
dest = __retype_uw(vec16(brw_null_reg()));
send_commit_msg = 0;
} else {
dest = src_header;
send_commit_msg = 1;
}
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
brw_set_dp_write_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
msg_type,
mlen,
true, /* header_present */
0, /* pixel scoreboard */
send_commit_msg, /* response_length */
0, /* eot */
send_commit_msg);
}
}
 
 
/**
* Read a block of owords (half a GRF each) from the scratch buffer
* using a constant index per channel.
*
* Offset must be aligned to oword size (16 bytes). Used for register
* spilling.
*/
void
brw_oword_block_read_scratch(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
int num_regs,
unsigned offset)
{
uint32_t msg_control;
int rlen;
 
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
dest = __retype_uw(dest);
 
if (num_regs == 1) {
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
rlen = 1;
} else {
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
rlen = 2;
}
 
{
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
brw_pop_insn_state(p);
}
 
{
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(insn->header.predicate_control == 0);
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
 
brw_set_dest(p, insn, dest); /* UW? */
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
rlen);
}
}
 
/**
* Read a float[4] vector from the data port Data Cache (const buffer).
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
void brw_oword_block_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t offset,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
/* On newer hardware, offset is in units of owords. */
if (p->gen >= 060)
offset /= 16;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
 
/* set message header global offset field (reg 0, element 2) */
brw_MOV(p,
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
brw_imm_ud(offset));
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, mrf);
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 reg, 2 owords!) */
 
brw_pop_insn_state(p);
}
 
/**
* Read a set of dwords from the data port Data Cache (const buffer).
*
* Location (in buffer) appears as UD offsets in the register after
* the provided mrf header reg.
*/
void brw_dword_scattered_read(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg mrf,
uint32_t bind_table_index)
{
struct brw_instruction *insn;
 
mrf = __retype_ud(mrf);
 
brw_push_insn_state(p);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = mrf.nr;
 
/* cast dest to a uword[8] vector */
dest = __retype_uw(vec8(dest));
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, brw_null_reg());
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
/**
* Read float[4] constant(s) from VS constant buffer.
* For relative addressing, two float[4] constants will be read into 'dest'.
* Otherwise, one float[4] constant will be read into the lower half of 'dest'.
*/
void brw_dp_READ_4_vs(struct brw_compile *p,
struct brw_reg dest,
unsigned location,
unsigned bind_table_index)
{
struct brw_instruction *insn;
unsigned msg_reg_nr = 1;
 
if (p->gen >= 060)
location /= 16;
 
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
brw_imm_ud(location));
brw_pop_insn_state(p);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
if (p->gen >= 060) {
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
} else {
brw_set_src0(p, insn, brw_null_reg());
}
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
0,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
1); /* response_length (1 Oword) */
}
 
/**
* Read a float[4] constant per vertex from VS constant buffer, with
* relative addressing.
*/
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
struct brw_reg dest,
struct brw_reg addr_reg,
unsigned offset,
unsigned bind_table_index)
{
struct brw_reg src = brw_vec8_grf(0, 0);
struct brw_instruction *insn;
int msg_type;
 
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
brw_ADD(p, __retype_d(brw_message_reg(1)),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
 
gen6_resolve_implied_move(p, &src, 0);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
 
if (p->gen >= 060)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else if (p->gen >= 045)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
 
brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
1); /* response_length */
}
 
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned msg_control,
unsigned binding_table_index,
unsigned msg_length,
unsigned response_length,
bool eot,
bool header_present)
{
struct brw_instruction *insn;
unsigned msg_type;
struct brw_reg dest;
 
if (dispatch_width == 16)
dest = __retype_uw(vec16(brw_null_reg()));
else
dest = __retype_uw(vec8(brw_null_reg()));
 
if (p->gen >= 060 && binding_table_index == 0) {
insn = brw_next_insn(p, BRW_OPCODE_SENDC);
} else {
insn = brw_next_insn(p, BRW_OPCODE_SEND);
}
/* The execution mask is ignored for render target writes. */
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
 
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
} else {
insn->header.destreg__conditionalmod = msg_reg_nr;
 
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
}
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p,
insn,
binding_table_index,
msg_control,
msg_type,
msg_length,
header_present,
eot,
response_length,
eot,
0 /* send_commit_msg */);
}
 
/**
* Texture sample instruction.
* Note: the msg_type plus msg_length values determine exactly what kind
* of sampling operation is performed. See volume 4, page 161 of docs.
*/
void brw_SAMPLE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
unsigned binding_table_index,
unsigned sampler,
unsigned writemask,
unsigned msg_type,
unsigned response_length,
unsigned msg_length,
bool header_present,
unsigned simd_mode)
{
assert(writemask);
 
if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 
writemask = ~writemask & WRITEMASK_XYZW;
 
brw_push_insn_state(p);
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
 
brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
 
brw_pop_insn_state(p);
 
src0 = __retype_uw(brw_null_reg());
}
 
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_sampler_message(p, insn,
binding_table_index,
sampler,
msg_type,
response_length,
msg_length,
header_present,
simd_mode);
}
}
 
/* All these variables are pretty confusing - we might be better off
* using bitmasks and macros for this, in the old style. Or perhaps
* just having the caller instantiate the fields in dword3 itself.
*/
void brw_urb_WRITE(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
bool used,
unsigned msg_length,
unsigned response_length,
bool eot,
bool writes_complete,
unsigned offset,
unsigned swizzle)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
if (p->gen >= 070) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
__retype_ud(brw_vec1_grf(0, 5)),
brw_imm_ud(0xff00));
brw_pop_insn_state(p);
}
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
 
assert(msg_length < BRW_MAX_MRF);
 
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen <= 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_urb_message(p,
insn,
allocate,
used,
msg_length,
response_length,
eot,
writes_complete,
offset,
swizzle);
}
 
static int
brw_find_next_block_end(struct brw_compile *p, int start)
{
int ip;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_ENDIF:
case BRW_OPCODE_ELSE:
case BRW_OPCODE_WHILE:
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* There is no DO instruction on gen6, so to find the end of the loop
* we have to see if the loop is jumping back before our start
* instruction.
*/
static int
brw_find_loop_end(struct brw_compile *p, int start)
{
int ip;
int br = 2;
 
for (ip = start + 1; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
if (insn->header.opcode == BRW_OPCODE_WHILE) {
int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
: insn->bits3.break_cont.jip;
if (ip + jip / br <= start)
return ip;
}
}
assert(!"not reached");
return start + 1;
}
 
/* After program generation, go back and update the UIP and JIP of
* BREAK and CONT instructions to their correct locations.
*/
void
brw_set_uip_jip(struct brw_compile *p)
{
int ip;
int br = 2;
 
if (p->gen <= 060)
return;
 
for (ip = 0; ip < p->nr_insn; ip++) {
struct brw_instruction *insn = &p->store[ip];
 
switch (insn->header.opcode) {
case BRW_OPCODE_BREAK:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
/* Gen7 UIP points to WHILE; Gen6 points just after it */
insn->bits3.break_cont.uip =
br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
break;
case BRW_OPCODE_CONTINUE:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
assert(insn->bits3.break_cont.uip != 0);
assert(insn->bits3.break_cont.jip != 0);
break;
}
}
}
 
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
unsigned msg_reg_nr,
struct brw_reg src0,
bool allocate,
unsigned response_length,
bool eot)
{
struct brw_instruction *insn;
 
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
 
insn = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, brw_imm_d(0));
 
if (p->gen < 060)
insn->header.destreg__conditionalmod = msg_reg_nr;
 
brw_set_ff_sync_message(p,
insn,
allocate,
response_length,
eot);
}
/drivers/video/Intel-2D/brw/brw_wm.c
0,0 → 1,681
#include "brw.h"
 
#define X16 8
#define Y16 10
 
static void brw_wm_xy(struct brw_compile *p, int dw)
{
struct brw_reg r1 = brw_vec1_grf(1, 0);
struct brw_reg r1_uw = __retype_uw(r1);
struct brw_reg x_uw, y_uw;
 
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
if (dw == 16) {
x_uw = brw_uw16_grf(30, 0);
y_uw = brw_uw16_grf(28, 0);
} else {
x_uw = brw_uw8_grf(30, 0);
y_uw = brw_uw8_grf(28, 0);
}
 
brw_ADD(p,
x_uw,
__stride(__suboffset(r1_uw, 4), 2, 4, 0),
brw_imm_v(0x10101010));
brw_ADD(p,
y_uw,
__stride(__suboffset(r1_uw, 5), 2, 4, 0),
brw_imm_v(0x11001100));
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1));
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1)));
}
 
static void brw_wm_affine_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
msg += dw/8;
 
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv, 4),
brw_vec8_grf(2, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
}
}
 
static inline unsigned simd(int dw)
{
return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8;
}
 
static inline struct brw_reg sample_result(int dw, int result)
{
return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static int brw_wm_sample(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
bool header;
int len;
 
len = dw == 16 ? 4 : 2;
if (p->gen >= 060) {
header = false;
src0 = brw_message_reg(++msg);
} else {
header = true;
src0 = brw_vec8_grf(0, 0);
}
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_XYZW, 0,
2*len, len+header, header, simd(dw));
return result;
}
 
static int brw_wm_sample__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
struct brw_reg src0;
int mlen, rlen;
 
if (dw == 8) {
/* SIMD8 sample return is not masked */
mlen = 3;
rlen = 4;
} else {
mlen = 5;
rlen = 2;
}
 
if (p->gen >= 060)
src0 = brw_message_reg(msg);
else
src0 = brw_vec8_grf(0, 0);
 
brw_SAMPLE(p, sample_result(dw, result), msg, src0,
channel+1, channel, WRITEMASK_W, 0,
rlen, mlen, true, simd(dw));
 
if (dw == 8)
result += 3;
 
return result;
}
 
static int brw_wm_affine(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_affine__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_affine_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
static inline struct brw_reg null_result(int dw)
{
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0,
BRW_REGISTER_TYPE_UW,
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8,
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8,
BRW_HORIZONTAL_STRIDE_1,
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
 
static void brw_fb_write(struct brw_compile *p, int dw)
{
struct brw_instruction *insn;
unsigned msg_control, msg_type, msg_len;
struct brw_reg src0;
bool header;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
msg_len = 8;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
msg_len = 4;
}
 
if (p->gen < 060) {
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0));
brw_pop_insn_state(p);
 
msg_len += 2;
}
 
/* The execution mask is ignored for render target writes. */
insn = brw_next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
 
if (p->gen >= 060) {
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = brw_message_reg(2);
header = false;
} else {
insn->header.destreg__conditionalmod = 0;
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
src0 = __retype_uw(brw_vec8_grf(0, 0));
header = true;
}
 
brw_set_dest(p, insn, null_result(dw));
brw_set_src0(p, insn, src0);
brw_set_dp_write_message(p, insn, 0,
msg_control, msg_type, msg_len,
header, true, 0, true, false);
}
 
static void brw_wm_write(struct brw_compile *p, int dw, int src)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
/* XXX pixel execution mask? */
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0));
brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0));
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0));
brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0));
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MOV(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MOV(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MOV(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec8_grf(mask, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec8_grf(mask, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec8_grf(mask+1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__opacity(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src+0, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src+1, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src+2, 0),
brw_vec1_grf(mask, 3));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src+3, 0),
brw_vec1_grf(mask, 3));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec1_grf(mask, 3));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n+1, 0),
brw_vec1_grf(mask, 3));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
static void brw_wm_write__mask_ca(struct brw_compile *p, int dw,
int src, int mask)
{
int n;
 
if (dw == 8 && p->gen >= 060) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
brw_MUL(p,
brw_message_reg(2),
brw_vec8_grf(src + 0, 0),
brw_vec8_grf(mask + 0, 0));
brw_MUL(p,
brw_message_reg(3),
brw_vec8_grf(src + 1, 0),
brw_vec8_grf(mask + 1, 0));
brw_MUL(p,
brw_message_reg(4),
brw_vec8_grf(src + 2, 0),
brw_vec8_grf(mask + 2, 0));
brw_MUL(p,
brw_message_reg(5),
brw_vec8_grf(src + 3, 0),
brw_vec8_grf(mask + 3, 0));
 
goto done;
}
 
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
for (n = 0; n < 4; n++) {
if (p->gen >= 060) {
brw_MUL(p,
brw_message_reg(2 + 2*n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else if (p->gen >= 045 && dw == 16) {
brw_MUL(p,
brw_message_reg(2 + n + BRW_MRF_COMPR4),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MUL(p,
brw_message_reg(2 + n),
brw_vec8_grf(src + 2*n, 0),
brw_vec8_grf(mask + 2*n, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_MUL(p,
brw_message_reg(2 + n + 4),
brw_vec8_grf(src + 2*n + 1, 0),
brw_vec8_grf(mask + 2*n + 1, 0));
}
}
}
 
done:
brw_fb_write(p, dw);
}
 
bool
brw_wm_kernel__affine(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_affine(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
/* Projective variants */
 
static void brw_wm_projective_st(struct brw_compile *p, int dw,
int channel, int msg)
{
int uv;
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
uv = p->gen >= 060 ? 6 : 3;
} else {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
uv = p->gen >= 060 ? 4 : 3;
}
uv += 2*channel;
 
msg++;
if (p->gen >= 060) {
/* First compute 1/z */
brw_PLN(p,
brw_message_reg(msg),
brw_vec1_grf(uv+1, 0),
brw_vec8_grf(2, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(2, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_PLN(p,
brw_vec8_grf(28, 0),
brw_vec1_grf(uv, 0),
brw_vec8_grf(4, 0));
brw_MUL(p,
brw_message_reg(msg),
brw_vec8_grf(28, 0),
brw_vec8_grf(30, 0));
} else {
struct brw_reg r = brw_vec1_grf(uv, 0);
 
/* First compute 1/z */
brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0));
 
if (dw == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
} else
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0));
 
/* Now compute the output s,t values */
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
msg += dw/8;
 
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0));
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0));
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0));
}
}
 
static int brw_wm_projective(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample(p, dw, channel, msg, result);
}
 
static int brw_wm_projective__alpha(struct brw_compile *p, int dw,
int channel, int msg, int result)
{
brw_wm_projective_st(p, dw, channel, msg);
return brw_wm_sample__alpha(p, dw, channel, msg, result);
}
 
bool
brw_wm_kernel__projective(struct brw_compile *p, int dispatch)
{
if (p->gen < 060)
brw_wm_xy(p, dispatch);
brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12));
 
return true;
}
 
bool
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20);
brw_wm_write__mask(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 20);
brw_wm_write__mask_ca(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060)
brw_wm_xy(p, dispatch);
 
src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12);
mask = brw_wm_projective(p, dispatch, 1, 6, 16);
brw_wm_write__mask(p, dispatch, mask, src);
 
return true;
}
 
bool
brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_affine(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
 
bool
brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch)
{
int src, mask;
 
if (p->gen < 060) {
brw_wm_xy(p, dispatch);
mask = 5;
} else
mask = dispatch == 16 ? 8 : 6;
 
src = brw_wm_projective(p, dispatch, 0, 1, 12);
brw_wm_write__opacity(p, dispatch, src, mask);
 
return true;
}
/drivers/video/Intel-2D/compiler.h
0,0 → 1,63
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifndef _SNA_COMPILER_H_
#define _SNA_COMPILER_H_
 
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
#define likely(expr) (__builtin_expect (!!(expr), 1))
#define unlikely(expr) (__builtin_expect (!!(expr), 0))
#define noinline __attribute__((noinline))
#define force_inline inline __attribute__((always_inline))
#define fastcall __attribute__((regparm(3)))
#define must_check __attribute__((warn_unused_result))
#define constant __attribute__((const))
#define pure __attribute__((pure))
#define __packed__ __attribute__((__packed__))
#else
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#define noinline
#define force_inline
#define fastcall
#define must_check
#define constant
#define pure
#define __packed__
#endif
 
#ifdef HAVE_VALGRIND
#define VG(x) x
#else
#define VG(x)
#endif
 
#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
 
#define COMPILE_TIME_ASSERT(E) ((void)sizeof(char[1 - 2*!(E)]))
 
#endif /* _SNA_COMPILER_H_ */
/drivers/video/Intel-2D/drm.h
0,0 → 1,825
/**
* \file drm.h
* Header for the Direct Rendering Manager
*
* \author Rickard E. (Rik) Faith <faith@valinux.com>
*
* \par Acknowledgments:
* Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
*/
 
/*
* Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
* Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
* All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
 
#ifndef _DRM_H_
#define _DRM_H_
 
#include <stddef.h>
//#include <asm/ioctl.h>
 
typedef int8_t __s8;
typedef uint8_t __u8;
typedef int16_t __s16;
typedef uint16_t __u16;
typedef int32_t __s32;
typedef uint32_t __u32;
typedef int64_t __s64;
typedef uint64_t __u64;
typedef unsigned int drm_handle_t;
 
 
#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */
#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */
#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */
#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */
 
#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */
#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */
#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD)
#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT)
#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT))
 
typedef unsigned int drm_context_t;
typedef unsigned int drm_drawable_t;
typedef unsigned int drm_magic_t;
 
/**
* Cliprect.
*
* \warning: If you change this structure, make sure you change
* XF86DRIClipRectRec in the server as well
*
* \note KW: Actually it's illegal to change either for
* backwards-compatibility reasons.
*/
struct drm_clip_rect {
unsigned short x1;
unsigned short y1;
unsigned short x2;
unsigned short y2;
};
 
/**
* Drawable information.
*/
struct drm_drawable_info {
unsigned int num_rects;
struct drm_clip_rect *rects;
};
 
/**
* Texture region,
*/
struct drm_tex_region {
unsigned char next;
unsigned char prev;
unsigned char in_use;
unsigned char padding;
unsigned int age;
};
 
/**
* Hardware lock.
*
* The lock structure is a simple cache-line aligned integer. To avoid
* processor bus contention on a multiprocessor system, there should not be any
* other data stored in the same cache line.
*/
struct drm_hw_lock {
__volatile__ unsigned int lock; /**< lock variable */
char padding[60]; /**< Pad to cache line */
};
 
/**
* DRM_IOCTL_VERSION ioctl argument type.
*
* \sa drmGetVersion().
*/
struct drm_version {
int version_major; /**< Major version */
int version_minor; /**< Minor version */
int version_patchlevel; /**< Patch level */
size_t name_len; /**< Length of name buffer */
char *name; /**< Name of driver */
size_t date_len; /**< Length of date buffer */
char *date; /**< User-space buffer to hold date */
size_t desc_len; /**< Length of desc buffer */
char *desc; /**< User-space buffer to hold desc */
};
 
/**
* DRM_IOCTL_GET_UNIQUE ioctl argument type.
*
* \sa drmGetBusid() and drmSetBusId().
*/
struct drm_unique {
size_t unique_len; /**< Length of unique */
char *unique; /**< Unique name for driver instantiation */
};
 
struct drm_list {
int count; /**< Length of user-space structures */
struct drm_version *version;
};
 
struct drm_block {
int unused;
};
 
/**
* DRM_IOCTL_CONTROL ioctl argument type.
*
* \sa drmCtlInstHandler() and drmCtlUninstHandler().
*/
struct drm_control {
enum {
DRM_ADD_COMMAND,
DRM_RM_COMMAND,
DRM_INST_HANDLER,
DRM_UNINST_HANDLER
} func;
int irq;
};
 
/**
* Type of memory to map.
*/
enum drm_map_type {
_DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */
_DRM_REGISTERS = 1, /**< no caching, no core dump */
_DRM_SHM = 2, /**< shared, cached */
_DRM_AGP = 3, /**< AGP/GART */
_DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */
_DRM_CONSISTENT = 5, /**< Consistent memory for PCI DMA */
_DRM_GEM = 6, /**< GEM object */
};
 
/**
* Memory mapping flags.
*/
enum drm_map_flags {
_DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */
_DRM_READ_ONLY = 0x02,
_DRM_LOCKED = 0x04, /**< shared, cached, locked */
_DRM_KERNEL = 0x08, /**< kernel requires access */
_DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */
_DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */
_DRM_REMOVABLE = 0x40, /**< Removable mapping */
_DRM_DRIVER = 0x80 /**< Managed by driver */
};
 
struct drm_ctx_priv_map {
unsigned int ctx_id; /**< Context requesting private mapping */
void *handle; /**< Handle of map */
};
 
/**
* DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
* argument type.
*
* \sa drmAddMap().
*/
struct drm_map {
unsigned long offset; /**< Requested physical address (0 for SAREA)*/
unsigned long size; /**< Requested physical size (bytes) */
enum drm_map_type type; /**< Type of memory to map */
enum drm_map_flags flags; /**< Flags */
void *handle; /**< User-space: "Handle" to pass to mmap() */
/**< Kernel-space: kernel-virtual address */
int mtrr; /**< MTRR slot used */
/* Private data */
};
 
/**
* DRM_IOCTL_GET_CLIENT ioctl argument type.
*/
struct drm_client {
int idx; /**< Which client desired? */
int auth; /**< Is client authenticated? */
unsigned long pid; /**< Process ID */
unsigned long uid; /**< User ID */
unsigned long magic; /**< Magic */
unsigned long iocs; /**< Ioctl count */
};
 
enum drm_stat_type {
_DRM_STAT_LOCK,
_DRM_STAT_OPENS,
_DRM_STAT_CLOSES,
_DRM_STAT_IOCTLS,
_DRM_STAT_LOCKS,
_DRM_STAT_UNLOCKS,
_DRM_STAT_VALUE, /**< Generic value */
_DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */
_DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */
 
_DRM_STAT_IRQ, /**< IRQ */
_DRM_STAT_PRIMARY, /**< Primary DMA bytes */
_DRM_STAT_SECONDARY, /**< Secondary DMA bytes */
_DRM_STAT_DMA, /**< DMA */
_DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */
_DRM_STAT_MISSED /**< Missed DMA opportunity */
/* Add to the *END* of the list */
};
 
/**
* DRM_IOCTL_GET_STATS ioctl argument type.
*/
struct drm_stats {
unsigned long count;
struct {
unsigned long value;
enum drm_stat_type type;
} data[15];
};
 
/**
* Hardware locking flags.
*/
enum drm_lock_flags {
_DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */
_DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */
_DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */
_DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */
/* These *HALT* flags aren't supported yet
-- they will be used to support the
full-screen DGA-like mode. */
_DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */
_DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */
};
 
/**
* DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
*
* \sa drmGetLock() and drmUnlock().
*/
struct drm_lock {
int context;
enum drm_lock_flags flags;
};
 
/**
* DMA flags
*
* \warning
* These values \e must match xf86drm.h.
*
* \sa drm_dma.
*/
enum drm_dma_flags {
/* Flags for DMA buffer dispatch */
_DRM_DMA_BLOCK = 0x01, /**<
* Block until buffer dispatched.
*
* \note The buffer may not yet have
* been processed by the hardware --
* getting a hardware lock with the
* hardware quiescent will ensure
* that the buffer has been
* processed.
*/
_DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */
_DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */
 
/* Flags for DMA buffer request */
_DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */
_DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */
_DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */
};
 
/**
* DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
*
* \sa drmAddBufs().
*/
struct drm_buf_desc {
int count; /**< Number of buffers of this size */
int size; /**< Size in bytes */
int low_mark; /**< Low water mark */
int high_mark; /**< High water mark */
enum {
_DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */
_DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */
_DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */
_DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */
_DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */
} flags;
unsigned long agp_start; /**<
* Start address of where the AGP buffers are
* in the AGP aperture
*/
};
 
/**
* DRM_IOCTL_INFO_BUFS ioctl argument type.
*/
struct drm_buf_info {
int count; /**< Entries in list */
struct drm_buf_desc *list;
};
 
/**
* DRM_IOCTL_FREE_BUFS ioctl argument type.
*/
struct drm_buf_free {
int count;
int *list;
};
 
/**
* Buffer information
*
* \sa drm_buf_map.
*/
struct drm_buf_pub {
int idx; /**< Index into the master buffer list */
int total; /**< Buffer size */
int used; /**< Amount of buffer in use (for DMA) */
void *address; /**< Address of buffer */
};
 
/**
* DRM_IOCTL_MAP_BUFS ioctl argument type.
*/
struct drm_buf_map {
int count; /**< Length of the buffer list */
#ifdef __cplusplus
void *virt;
#else
void *virtual; /**< Mmap'd area in user-virtual */
#endif
struct drm_buf_pub *list; /**< Buffer information */
};
 
/**
* DRM_IOCTL_DMA ioctl argument type.
*
* Indices here refer to the offset into the buffer list in drm_buf_get.
*
* \sa drmDMA().
*/
struct drm_dma {
int context; /**< Context handle */
int send_count; /**< Number of buffers to send */
int *send_indices; /**< List of handles to buffers */
int *send_sizes; /**< Lengths of data to send */
enum drm_dma_flags flags; /**< Flags */
int request_count; /**< Number of buffers requested */
int request_size; /**< Desired size for buffers */
int *request_indices; /**< Buffer information */
int *request_sizes;
int granted_count; /**< Number of buffers granted */
};
 
enum drm_ctx_flags {
_DRM_CONTEXT_PRESERVED = 0x01,
_DRM_CONTEXT_2DONLY = 0x02
};
 
/**
* DRM_IOCTL_ADD_CTX ioctl argument type.
*
* \sa drmCreateContext() and drmDestroyContext().
*/
struct drm_ctx {
drm_context_t handle;
enum drm_ctx_flags flags;
};
 
/**
* DRM_IOCTL_RES_CTX ioctl argument type.
*/
struct drm_ctx_res {
int count;
struct drm_ctx *contexts;
};
 
/**
* DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
*/
struct drm_draw {
drm_drawable_t handle;
};
 
/**
* DRM_IOCTL_UPDATE_DRAW ioctl argument type.
*/
typedef enum {
DRM_DRAWABLE_CLIPRECTS,
} drm_drawable_info_type_t;
 
struct drm_update_draw {
drm_drawable_t handle;
unsigned int type;
unsigned int num;
unsigned long long data;
};
 
/**
* DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
*/
struct drm_auth {
drm_magic_t magic;
};
 
/**
* DRM_IOCTL_IRQ_BUSID ioctl argument type.
*
* \sa drmGetInterruptFromBusID().
*/
struct drm_irq_busid {
int irq; /**< IRQ number */
int busnum; /**< bus number */
int devnum; /**< device number */
int funcnum; /**< function number */
};
 
enum drm_vblank_seq_type {
_DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */
_DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */
_DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */
_DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */
_DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */
_DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */
_DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */
};
 
#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
_DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS)
 
struct drm_wait_vblank_request {
enum drm_vblank_seq_type type;
unsigned int sequence;
unsigned long signal;
};
 
struct drm_wait_vblank_reply {
enum drm_vblank_seq_type type;
unsigned int sequence;
long tval_sec;
long tval_usec;
};
 
/**
* DRM_IOCTL_WAIT_VBLANK ioctl argument type.
*
* \sa drmWaitVBlank().
*/
union drm_wait_vblank {
struct drm_wait_vblank_request request;
struct drm_wait_vblank_reply reply;
};
 
#define _DRM_PRE_MODESET 1
#define _DRM_POST_MODESET 2
 
/**
* DRM_IOCTL_MODESET_CTL ioctl argument type
*
* \sa drmModesetCtl().
*/
struct drm_modeset_ctl {
__u32 crtc;
__u32 cmd;
};
 
/**
* DRM_IOCTL_AGP_ENABLE ioctl argument type.
*
* \sa drmAgpEnable().
*/
struct drm_agp_mode {
unsigned long mode; /**< AGP mode */
};
 
/**
* DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
*
* \sa drmAgpAlloc() and drmAgpFree().
*/
struct drm_agp_buffer {
unsigned long size; /**< In bytes -- will round to page boundary */
unsigned long handle; /**< Used for binding / unbinding */
unsigned long type; /**< Type of memory to allocate */
unsigned long physical; /**< Physical used by i810 */
};
 
/**
* DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
*
* \sa drmAgpBind() and drmAgpUnbind().
*/
struct drm_agp_binding {
unsigned long handle; /**< From drm_agp_buffer */
unsigned long offset; /**< In bytes -- will round to page boundary */
};
 
/**
* DRM_IOCTL_AGP_INFO ioctl argument type.
*
* \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
* drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(),
* drmAgpVendorId() and drmAgpDeviceId().
*/
struct drm_agp_info {
int agp_version_major;
int agp_version_minor;
unsigned long mode;
unsigned long aperture_base; /* physical address */
unsigned long aperture_size; /* bytes */
unsigned long memory_allowed; /* bytes */
unsigned long memory_used;
 
/* PCI information */
unsigned short id_vendor;
unsigned short id_device;
};
 
/**
* DRM_IOCTL_SG_ALLOC ioctl argument type.
*/
struct drm_scatter_gather {
unsigned long size; /**< In bytes -- will round to page boundary */
unsigned long handle; /**< Used for mapping / unmapping */
};
 
/**
* DRM_IOCTL_SET_VERSION ioctl argument type.
*/
struct drm_set_version {
int drm_di_major;
int drm_di_minor;
int drm_dd_major;
int drm_dd_minor;
};
 
/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
struct drm_gem_close {
/** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
 
/** DRM_IOCTL_GEM_FLINK ioctl argument type */
struct drm_gem_flink {
/** Handle for the object being named */
__u32 handle;
 
/** Returned global name */
__u32 name;
};
 
/** DRM_IOCTL_GEM_OPEN ioctl argument type */
struct drm_gem_open {
/** Name of object being opened */
__u32 name;
 
/** Returned handle for the object */
__u32 handle;
 
/** Returned size of the object */
__u64 size;
};
 
/** DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
__u64 capability;
__u64 value;
};
 
#define DRM_CLOEXEC O_CLOEXEC
struct drm_prime_handle {
__u32 handle;
 
/** Flags.. only applicable for handle->fd */
__u32 flags;
 
/** Returned dmabuf file descriptor */
__s32 fd;
};
 
//#include "drm_mode.h"
 
#if 0
 
#define DRM_IOCTL_BASE 'd'
#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr)
#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type)
#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type)
#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type)
 
#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version)
#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique)
#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth)
#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid)
#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map)
#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client)
#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats)
#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version)
#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl)
#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close)
#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink)
#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open)
#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap)
 
#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique)
#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth)
#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block)
#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block)
#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control)
#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map)
#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc)
#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc)
#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info)
#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map)
#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free)
 
#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map)
 
#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map)
#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map)
 
#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e)
#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f)
 
#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx)
#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx)
#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx)
#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx)
#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx)
#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx)
#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res)
#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw)
#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw)
#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma)
#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock)
#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
 
#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle)
#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle)
 
#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30)
#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31)
#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode)
#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info)
#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer)
#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer)
#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding)
#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding)
 
#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather)
#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather)
 
#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank)
 
#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw)
 
#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res)
#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc)
#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc)
#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor)
#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder)
#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector)
#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd)
#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd)
 
#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property)
#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
 
#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb)
#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res)
#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane)
#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane)
#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2)
#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties)
#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property)
 
#endif
 
/**
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x99.
* Generic IOCTLS restart at 0xA0.
*
* \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
* drmCommandReadWrite().
*/
#define DRM_COMMAND_BASE 0x40
#define DRM_COMMAND_END 0xA0
 
/**
* Header for events written back to userspace on the drm fd. The
* type defines the type of event, the length specifies the total
* length of the event (including the header), and user_data is
* typically a 64 bit value passed with the ioctl that triggered the
* event. A read on the drm fd will always only return complete
* events, that is, if for example the read buffer is 100 bytes, and
* there are two 64 byte events pending, only one will be returned.
*
* Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
* up are chipset specific.
*/
struct drm_event {
__u32 type;
__u32 length;
};
 
#define DRM_EVENT_VBLANK 0x01
#define DRM_EVENT_FLIP_COMPLETE 0x02
 
struct drm_event_vblank {
struct drm_event base;
__u64 user_data;
__u32 tv_sec;
__u32 tv_usec;
__u32 sequence;
__u32 reserved;
};
 
#define DRM_CAP_DUMB_BUFFER 0x1
#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3
#define DRM_CAP_DUMB_PREFER_SHADOW 0x4
#define DRM_CAP_PRIME 0x5
 
#define DRM_PRIME_CAP_IMPORT 0x1
#define DRM_PRIME_CAP_EXPORT 0x2
 
/* typedef area */
typedef struct drm_clip_rect drm_clip_rect_t;
typedef struct drm_drawable_info drm_drawable_info_t;
typedef struct drm_tex_region drm_tex_region_t;
typedef struct drm_hw_lock drm_hw_lock_t;
typedef struct drm_version drm_version_t;
typedef struct drm_unique drm_unique_t;
typedef struct drm_list drm_list_t;
typedef struct drm_block drm_block_t;
typedef struct drm_control drm_control_t;
typedef enum drm_map_type drm_map_type_t;
typedef enum drm_map_flags drm_map_flags_t;
typedef struct drm_ctx_priv_map drm_ctx_priv_map_t;
typedef struct drm_map drm_map_t;
typedef struct drm_client drm_client_t;
typedef enum drm_stat_type drm_stat_type_t;
typedef struct drm_stats drm_stats_t;
typedef enum drm_lock_flags drm_lock_flags_t;
typedef struct drm_lock drm_lock_t;
typedef enum drm_dma_flags drm_dma_flags_t;
typedef struct drm_buf_desc drm_buf_desc_t;
typedef struct drm_buf_info drm_buf_info_t;
typedef struct drm_buf_free drm_buf_free_t;
typedef struct drm_buf_pub drm_buf_pub_t;
typedef struct drm_buf_map drm_buf_map_t;
typedef struct drm_dma drm_dma_t;
typedef union drm_wait_vblank drm_wait_vblank_t;
typedef struct drm_agp_mode drm_agp_mode_t;
typedef enum drm_ctx_flags drm_ctx_flags_t;
typedef struct drm_ctx drm_ctx_t;
typedef struct drm_ctx_res drm_ctx_res_t;
typedef struct drm_draw drm_draw_t;
typedef struct drm_update_draw drm_update_draw_t;
typedef struct drm_auth drm_auth_t;
typedef struct drm_irq_busid drm_irq_busid_t;
typedef enum drm_vblank_seq_type drm_vblank_seq_type_t;
 
typedef struct drm_agp_buffer drm_agp_buffer_t;
typedef struct drm_agp_binding drm_agp_binding_t;
typedef struct drm_agp_info drm_agp_info_t;
typedef struct drm_scatter_gather drm_scatter_gather_t;
typedef struct drm_set_version drm_set_version_t;
 
#endif
/drivers/video/Intel-2D/gen4_vertex.h
0,0 → 1,16
#ifndef GEN4_VERTEX_H
#define GEN4_VERTEX_H
 
#include "compiler.h"
 
#include "sna.h"
#include "sna_render.h"
 
void gen4_vertex_flush(struct sna *sna);
int gen4_vertex_finish(struct sna *sna);
void gen4_vertex_close(struct sna *sna);
 
unsigned gen4_choose_composite_emitter(struct sna_composite_op *tmp);
//unsigned gen4_choose_spans_emitter(struct sna_composite_spans_op *tmp);
 
#endif /* GEN4_VERTEX_H */
/drivers/video/Intel-2D/gen6_render.c
0,0 → 1,3459
/*
* Copyright © 2006,2008,2011 Intel Corporation
* Copyright © 2007 Red Hat, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Wang Zhenyu <zhenyu.z.wang@sna.com>
* Eric Anholt <eric@anholt.net>
* Carl Worth <cworth@redhat.com>
* Keith Packard <keithp@keithp.com>
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
#include "sna_render.h"
//#include "sna_render_inline.h"
//#include "sna_video.h"
 
#include "brw/brw.h"
#include "gen6_render.h"
 
#include "gen4_vertex.h"
 
#define NO_COMPOSITE 0
#define NO_COMPOSITE_SPANS 0
#define NO_COPY 0
#define NO_COPY_BOXES 0
#define NO_FILL 0
#define NO_FILL_BOXES 0
#define NO_FILL_ONE 0
#define NO_FILL_CLEAR 0
 
#define NO_RING_SWITCH 1
#define PREFER_RENDER 0
 
#define USE_8_PIXEL_DISPATCH 1
#define USE_16_PIXEL_DISPATCH 1
#define USE_32_PIXEL_DISPATCH 0
 
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
#error "Must select at least 8, 16 or 32 pixel dispatch"
#endif
 
#define GEN6_MAX_SIZE 8192
 
struct gt_info {
int max_vs_threads;
int max_gs_threads;
int max_wm_threads;
struct {
int size;
int max_vs_entries;
int max_gs_entries;
} urb;
};
 
static const struct gt_info gt1_info = {
.max_vs_threads = 24,
.max_gs_threads = 21,
.max_wm_threads = 40,
.urb = { 32, 256, 256 },
};
 
static const struct gt_info gt2_info = {
.max_vs_threads = 60,
.max_gs_threads = 60,
.max_wm_threads = 80,
.urb = { 64, 256, 256 },
};
 
static const uint32_t ps_kernel_packed[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_argb.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_src_affine.g6b"
#include "exa_wm_src_sample_planar.g6b"
#include "exa_wm_yuv_rgb.g6b"
#include "exa_wm_write.g6b"
};
 
#define NOKERNEL(kernel_enum, func, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
#define KERNEL(kernel_enum, kernel, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
 
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
unsigned int num_surfaces;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
 
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
 
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
 
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
 
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
 
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
 
static const struct blendinfo {
bool src_alpha;
uint32_t src_blend;
uint32_t dst_blend;
} gen6_blend_op[] = {
/* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
/* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
/* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
/* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
/* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
/* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
/* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
};
 
/**
* Highest-valued BLENDFACTOR used in gen6_blend_op.
*
* This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
* GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
* GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
*/
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
 
#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64)
 
#define BLEND_OFFSET(s, d) \
(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
 
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
 
#define SAMPLER_OFFSET(sf, se, mf, me) \
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
 
#define VERTEX_2s2s 0
 
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
 
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
#define FILL_VERTEX VERTEX_2s2s
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
 
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
#define GEN6_SET_FLAGS(S, B, K, V) (((S) | (K)) << 16 | ((B) | (V)))
 
#define OUT_BATCH(v) batch_emit(sna, v)
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
static inline bool too_large(int width, int height)
{
return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
}
 
static uint32_t gen6_get_blend(int op,
bool has_component_alpha,
uint32_t dst_format)
{
uint32_t src, dst;
 
// src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
// dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
 
src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
 
#if 0
/* If there's no dst alpha channel, adjust the blend op so that
* we'll treat it always as 1.
*/
if (PICT_FORMAT_A(dst_format) == 0) {
if (src == GEN6_BLENDFACTOR_DST_ALPHA)
src = GEN6_BLENDFACTOR_ONE;
else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
src = GEN6_BLENDFACTOR_ZERO;
}
 
/* If the source alpha is being used, then we should only be in a
* case where the source blend factor is 0, and the source blend
* value is the mask channels multiplied by the source picture's alpha.
*/
if (has_component_alpha && gen6_blend_op[op].src_alpha) {
if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_SRC_COLOR;
else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
}
 
DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
op, dst_format, PICT_FORMAT_A(dst_format),
src, dst, (int)BLEND_OFFSET(src, dst)));
#endif
 
return BLEND_OFFSET(src, dst);
}
 
static uint32_t gen6_get_card_format(PictFormat format)
{
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
 
/*
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
case PICT_a8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
case PICT_a2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
case PICT_r8g8b8:
return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
*/
}
 
static uint32_t gen6_get_dest_format(PictFormat format)
{
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
 
#if 0
 
switch (format) {
default:
return -1;
case PICT_a8r8g8b8:
case PICT_x8r8g8b8:
return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
case PICT_a8b8g8r8:
case PICT_x8b8g8r8:
return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
case PICT_a2r10g10b10:
case PICT_x2r10g10b10:
return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
case PICT_r5g6b5:
return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
case PICT_x1r5g5b5:
case PICT_a1r5g5b5:
return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
case PICT_a8:
return GEN6_SURFACEFORMAT_A8_UNORM;
case PICT_a4r4g4b4:
case PICT_x4r4g4b4:
return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
}
#endif
 
}
 
#if 0
 
static bool gen6_check_dst_format(PictFormat format)
{
if (gen6_get_dest_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static bool gen6_check_format(uint32_t format)
{
if (gen6_get_card_format(format) != -1)
return true;
 
DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
return false;
}
 
static uint32_t gen6_filter(uint32_t filter)
{
switch (filter) {
default:
assert(0);
case PictFilterNearest:
return SAMPLER_FILTER_NEAREST;
case PictFilterBilinear:
return SAMPLER_FILTER_BILINEAR;
}
}
 
static uint32_t gen6_check_filter(PicturePtr picture)
{
switch (picture->filter) {
case PictFilterNearest:
case PictFilterBilinear:
return true;
default:
return false;
}
}
 
static uint32_t gen6_repeat(uint32_t repeat)
{
switch (repeat) {
default:
assert(0);
case RepeatNone:
return SAMPLER_EXTEND_NONE;
case RepeatNormal:
return SAMPLER_EXTEND_REPEAT;
case RepeatPad:
return SAMPLER_EXTEND_PAD;
case RepeatReflect:
return SAMPLER_EXTEND_REFLECT;
}
}
 
static bool gen6_check_repeat(PicturePtr picture)
{
if (!picture->repeat)
return true;
 
switch (picture->repeatType) {
case RepeatNone:
case RepeatNormal:
case RepeatPad:
case RepeatReflect:
return true;
default:
return false;
}
}
#endif
 
static int
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
{
int base;
 
if (has_mask) {
/*
if (is_ca) {
if (gen6_blend_op[op].src_alpha)
base = GEN6_WM_KERNEL_MASKCA_SRCALPHA;
else
base = GEN6_WM_KERNEL_MASKCA;
} else
base = GEN6_WM_KERNEL_MASK;
*/
} else
base = GEN6_WM_KERNEL_NOMASK;
 
return base + !is_affine;
}
 
static void
gen6_emit_urb(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
(sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
(0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
}
 
static void
gen6_emit_state_base_address(struct sna *sna)
{
OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(0); /* general */
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
sna->kgem.nbatch,
NULL,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
OUT_BATCH(0); /* indirect */
OUT_BATCH(kgem_add_reloc(&sna->kgem,
sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16,
BASE_ADDRESS_MODIFY));
 
/* upper bounds, disable */
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
OUT_BATCH(0);
OUT_BATCH(BASE_ADDRESS_MODIFY);
}
 
static void
gen6_emit_viewports(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
(4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_vs(struct sna *sna)
{
/* disable VS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
OUT_BATCH(0); /* no VS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_gs(struct sna *sna)
{
/* disable GS constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
OUT_BATCH(0); /* no GS kernel */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
}
 
static void
gen6_emit_clip(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0); /* pass-through */
OUT_BATCH(0);
}
 
static void
gen6_emit_wm_constants(struct sna *sna)
{
/* disable WM constant buffer */
OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_null_depth_buffer(struct sna *sna)
{
OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
OUT_BATCH(0);
}
 
static void
gen6_emit_invariant(struct sna *sna)
{
OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
 
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
OUT_BATCH(1);
 
gen6_emit_urb(sna);
 
gen6_emit_state_base_address(sna);
 
gen6_emit_viewports(sna);
gen6_emit_vs(sna);
gen6_emit_gs(sna);
gen6_emit_clip(sna);
gen6_emit_wm_constants(sna);
gen6_emit_null_depth_buffer(sna);
 
sna->render_state.gen6.needs_invariant = false;
}
 
static bool
gen6_emit_cc(struct sna *sna, int blend)
{
struct gen6_render_state *render = &sna->render_state.gen6;
 
if (render->blend == blend)
return blend != NO_BLEND;
 
DBG(("%s: blend = %x\n", __FUNCTION__, blend));
 
OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
OUT_BATCH((render->cc_blend + blend) | 1);
if (render->blend == (unsigned)-1) {
OUT_BATCH(1);
OUT_BATCH(1);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
}
 
render->blend = blend;
return blend != NO_BLEND;
}
 
static void
gen6_emit_sampler(struct sna *sna, uint32_t state)
{
if (sna->render_state.gen6.samplers == state)
return;
 
sna->render_state.gen6.samplers = state;
 
DBG(("%s: sampler = %x\n", __FUNCTION__, state));
 
OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
OUT_BATCH(sna->render_state.gen6.wm_state + state);
}
 
static void
gen6_emit_sf(struct sna *sna, bool has_mask)
{
int num_sf_outputs = has_mask ? 2 : 1;
 
if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
return;
 
DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
__FUNCTION__, num_sf_outputs, 1, 0));
 
sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
 
OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0);
OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW9 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW14 */
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0); /* DW19 */
}
 
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
{
const uint32_t *kernels;
 
if (sna->render_state.gen6.kernel == kernel)
return;
 
sna->render_state.gen6.kernel = kernel;
kernels = sna->render_state.gen6.wm_kernel[kernel];
 
DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
__FUNCTION__,
wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
kernels[0], kernels[1], kernels[2]));
 
OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
OUT_BATCH(0); /* scratch space */
OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
(kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
}
 
static bool
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
{
if (sna->render_state.gen6.surface_table == offset)
return false;
 
/* Binding table pointers */
OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* vs */
OUT_BATCH(0); /* gs */
/* Only the PS uses the binding table */
OUT_BATCH(offset*4);
 
sna->render_state.gen6.surface_table = offset;
return true;
}
 
static bool
gen6_emit_drawing_rectangle(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
assert(!too_large(op->dst.x, op->dst.y));
assert(!too_large(op->dst.width, op->dst.height));
 
if (sna->render_state.gen6.drawrect_limit == limit &&
sna->render_state.gen6.drawrect_offset == offset)
return false;
 
/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*/
if (!sna->render_state.gen6.first_state_packet) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
sna->render_state.gen6.general_bo,
I915_GEM_DOMAIN_INSTRUCTION << 16 |
I915_GEM_DOMAIN_INSTRUCTION,
64));
OUT_BATCH(0);
 
OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(limit);
OUT_BATCH(offset);
 
sna->render_state.gen6.drawrect_offset = offset;
sna->render_state.gen6.drawrect_limit = limit;
return true;
}
 
static void
gen6_emit_vertex_elements(struct sna *sna,
const struct sna_composite_op *op)
{
/*
* vertex data in vertex buffer
* position: (x, y)
* texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen6_render_state *render = &sna->render_state.gen6;
uint32_t src_format, dw;
int id = GEN6_VERTEX(op->u.gen6.flags);
bool has_mask;
 
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
 
if (render->ve_id == id)
return;
render->ve_id = id;
 
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
* dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
* dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
((2 * (3 + has_mask)) + 1 - 2));
 
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
 
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
 
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
default:
assert(0);
case 0:
src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
 
/* u1, v1, w1 */
if (has_mask) {
unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id >> 2) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
 
static void
gen6_emit_flush(struct sna *sna)
{
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
GEN6_PIPE_CONTROL_TC_FLUSH |
GEN6_PIPE_CONTROL_CS_STALL);
OUT_BATCH(0);
OUT_BATCH(0);
}
 
static void
gen6_emit_state(struct sna *sna,
const struct sna_composite_op *op,
uint16_t wm_binding_table)
{
bool need_stall = wm_binding_table & 1;
 
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_vertex_elements(sna, op);
 
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
if (gen6_emit_drawing_rectangle(sna, op))
need_stall = false;
if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
gen6_emit_flush(sna);
kgem_clear_dirty(&sna->kgem);
if (op->dst.bo->exec)
kgem_bo_mark_dirty(op->dst.bo);
need_stall = false;
}
if (need_stall) {
OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
OUT_BATCH(0);
OUT_BATCH(0);
}
sna->render_state.gen6.first_state_packet = false;
}
 
static bool gen6_magic_ca_pass(struct sna *sna,
const struct sna_composite_op *op)
{
struct gen6_render_state *state = &sna->render_state.gen6;
 
if (!op->need_magic_ca_pass)
return false;
 
DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
sna->render.vertex_start, sna->render.vertex_index));
 
gen6_emit_flush(sna);
 
gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine),
true);
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
OUT_BATCH(sna->render.vertex_start);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
 
state->last_primitive = sna->kgem.nbatch;
return true;
}
 
typedef struct gen6_surface_state_padded {
struct gen6_surface_state state;
char pad[32 - sizeof(struct gen6_surface_state)];
} gen6_surface_state_padded;
 
static void null_create(struct sna_static_stream *stream)
{
/* A bunch of zeros useful for legacy border color and depth-stencil */
sna_static_stream_map(stream, 64, 64);
}
 
static void scratch_create(struct sna_static_stream *stream)
{
/* 64 bytes of scratch space for random writes, such as
* the pipe-control w/a.
*/
sna_static_stream_map(stream, 64, 64);
}
 
static void
sampler_state_init(struct gen6_sampler_state *sampler_state,
sampler_filter_t filter,
sampler_extend_t extend)
{
sampler_state->ss0.lod_preclamp = 1; /* GL mode */
 
/* We use the legacy mode to get the semantics specified by
* the Render extension. */
sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
 
switch (filter) {
default:
case SAMPLER_FILTER_NEAREST:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
break;
case SAMPLER_FILTER_BILINEAR:
sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
break;
}
 
switch (extend) {
default:
case SAMPLER_EXTEND_NONE:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
break;
case SAMPLER_EXTEND_REPEAT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
break;
case SAMPLER_EXTEND_PAD:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
break;
case SAMPLER_EXTEND_REFLECT:
sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
break;
}
}
 
static void
sampler_copy_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static void
sampler_fill_init(struct gen6_sampler_state *ss)
{
sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
ss->ss3.non_normalized_coord = 1;
 
sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
}
 
static uint32_t
gen6_tiling_bits(uint32_t tiling)
{
return 0;
/*
switch (tiling) {
default: assert(0);
case I915_TILING_NONE: return 0;
case I915_TILING_X: return GEN6_SURFACE_TILED;
case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
}
*/
}
 
/**
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
gen6_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
uint32_t height,
uint32_t format,
bool is_dst)
{
uint32_t *ss;
uint32_t domains;
uint16_t offset;
uint32_t is_scanout = is_dst && bo->scanout;
 
/* After the first bind, we manage the cache domains within the batch */
offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
if (offset) {
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
is_dst ? "render" : "sampler"));
if (is_dst)
kgem_bo_mark_dirty(bo);
return offset * sizeof(uint32_t);
}
 
offset = sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + offset;
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
if (is_dst)
domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
else
domains = I915_GEM_DOMAIN_SAMPLER << 16;
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
assert(bo->pitch <= (1 << 18));
ss[3] = (gen6_tiling_bits(bo->tiling) |
(bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
ss[4] = 0;
ss[5] = is_scanout ? 0 : 3 << 16;
 
kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
 
DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
offset, bo->handle, ss[1],
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
 
return offset * sizeof(uint32_t);
}
 
static void gen6_emit_vertex_buffer(struct sna *sna,
const struct sna_composite_op *op)
{
int id = GEN6_VERTEX(op->u.gen6.flags);
 
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(~0); /* max address: disabled */
OUT_BATCH(0);
 
sna->render.vb_id |= 1 << id;
}
 
static void gen6_emit_primitive(struct sna *sna)
{
if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
__FUNCTION__,
sna->render.vertex_start,
sna->render.vertex_index));
sna->render.vertex_offset = sna->kgem.nbatch - 5;
return;
}
 
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
_3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
0 << 9 |
4);
sna->render.vertex_offset = sna->kgem.nbatch;
OUT_BATCH(0); /* vertex count, to be filled in later */
OUT_BATCH(sna->render.vertex_index);
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
sna->render.vertex_start = sna->render.vertex_index;
DBG(("%s: started new primitive: index=%d\n",
__FUNCTION__, sna->render.vertex_start));
 
sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
}
 
static bool gen6_rectangle_begin(struct sna *sna,
const struct sna_composite_op *op)
{
int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
int ndwords;
 
ndwords = op->need_magic_ca_pass ? 60 : 6;
if ((sna->render.vb_id & id) == 0)
ndwords += 5;
if (!kgem_check_batch(&sna->kgem, ndwords))
return false;
 
if ((sna->render.vb_id & id) == 0)
gen6_emit_vertex_buffer(sna, op);
 
gen6_emit_primitive(sna);
return true;
}
 
static int gen6_get_rectangles__flush(struct sna *sna,
const struct sna_composite_op *op)
{
 
if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
return 0;
if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
return 0;
 
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
if (gen6_magic_ca_pass(sna, op)) {
gen6_emit_flush(sna);
gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
gen6_emit_wm(sna,
GEN6_KERNEL(op->u.gen6.flags),
GEN6_VERTEX(op->u.gen6.flags) >> 2);
}
}
 
return gen4_vertex_finish(sna);
}
 
inline static int gen6_get_rectangles(struct sna *sna,
const struct sna_composite_op *op,
int want,
void (*emit_state)(struct sna *, const struct sna_composite_op *op))
{
int rem;
 
start:
rem = vertex_space(sna);
if (unlikely(rem < op->floats_per_rect)) {
DBG(("flushing vbo for %s: %d < %d\n",
__FUNCTION__, rem, op->floats_per_rect));
rem = gen6_get_rectangles__flush(sna, op);
if (unlikely(rem == 0))
goto flush;
}
 
if (unlikely(sna->render.vertex_offset == 0 &&
!gen6_rectangle_begin(sna, op)))
goto flush;
 
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
 
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
 
flush:
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
// sna_vertex_wait__locked(&sna->render);
_kgem_submit(&sna->kgem);
emit_state(sna, op);
goto start;
}
 
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
uint16_t *offset)
{
uint32_t *table;
 
sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
/* Clear all surplus entries to zero in case of prefetch */
table = memset(sna->kgem.batch + sna->kgem.surface,
0, sizeof(struct gen6_surface_state_padded));
 
DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
 
*offset = sna->kgem.surface;
return table;
}
 
static bool
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
{
kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
DBG(("%s: flushing batch: %d < %d+%d\n",
__FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
150, 4*8));
kgem_submit(&sna->kgem);
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
if (sna->render_state.gen6.needs_invariant)
gen6_emit_invariant(sna);
 
return kgem_bo_is_dirty(op->dst.bo);
}
 
static void gen6_emit_composite_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
if (op->mask.bo) {
binding_table[2] =
gen6_bind_bo(sna,
op->mask.bo,
op->mask.width,
op->mask.height,
op->mask.card_format,
false);
}
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
(op->mask.bo == NULL ||
sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static void
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
{
assert (sna->render.vertex_offset == 0);
if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
gen4_vertex_finish(sna);
 
DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
sna->render_state.gen6.floats_per_vertex,
op->floats_per_vertex,
sna->render.vertex_index,
(sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
}
assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
}
 
#if 0
 
fastcall static void
gen6_render_composite_blt(struct sna *sna,
const struct sna_composite_op *op,
const struct sna_composite_rectangles *r)
{
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, r);
}
 
fastcall static void
gen6_render_composite_box(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box)
{
struct sna_composite_rectangles r;
 
gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
}
 
static void
gen6_render_composite_boxes__blt(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("composite_boxes(%d)\n", nbox));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
struct sna_composite_rectangles r;
 
DBG((" %s: (%d, %d), (%d, %d)\n",
__FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
r.dst.x = box->x1;
r.dst.y = box->y1;
r.width = box->x2 - box->x1;
r.height = box->y2 - box->y1;
r.src = r.mask = r.dst;
 
op->prim_emit(sna, op, &r);
box++;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_composite_boxes(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
} while (nbox);
}
 
static void
gen6_render_composite_boxes__thread(struct sna *sna,
const struct sna_composite_op *op,
const BoxRec *box, int nbox)
{
DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, op, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
#endif
 
#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
 
static uint32_t
gen6_composite_create_blend_state(struct sna_static_stream *stream)
{
char *base, *ptr;
int src, dst;
 
base = sna_static_stream_map(stream,
GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
64);
 
ptr = base;
for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
struct gen6_blend_state *blend =
(struct gen6_blend_state *)ptr;
 
blend->blend0.dest_blend_factor = dst;
blend->blend0.source_blend_factor = src;
blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
blend->blend0.blend_enable =
!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
 
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.pre_blend_clamp_enable = 1;
 
ptr += GEN6_BLEND_STATE_PADDED_SIZE;
}
}
 
return sna_static_stream_offsetof(stream, base);
}
 
#if 0
 
static uint32_t gen6_bind_video_source(struct sna *sna,
struct kgem_bo *src_bo,
uint32_t src_offset,
int src_width,
int src_height,
int src_pitch,
uint32_t src_surf_format)
{
struct gen6_surface_state *ss;
 
sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
 
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss->ss0.surface_type = GEN6_SURFACE_2D;
ss->ss0.surface_format = src_surf_format;
 
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
src_bo,
I915_GEM_DOMAIN_SAMPLER << 16,
src_offset);
 
ss->ss2.width = src_width - 1;
ss->ss2.height = src_height - 1;
ss->ss3.pitch = src_pitch - 1;
 
return sna->kgem.surface * sizeof(uint32_t);
}
 
static void gen6_emit_video_state(struct sna *sna,
const struct sna_composite_op *op)
{
struct sna_video_frame *frame = op->priv;
uint32_t src_surf_format;
uint32_t src_surf_base[6];
int src_width[6];
int src_height[6];
int src_pitch[6];
uint32_t *binding_table;
uint16_t offset;
bool dirty;
int n_src, n;
 
dirty = gen6_get_batch(sna, op);
 
src_surf_base[0] = 0;
src_surf_base[1] = 0;
src_surf_base[2] = frame->VBufOffset;
src_surf_base[3] = frame->VBufOffset;
src_surf_base[4] = frame->UBufOffset;
src_surf_base[5] = frame->UBufOffset;
 
if (is_planar_fourcc(frame->id)) {
src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
src_width[1] = src_width[0] = frame->width;
src_height[1] = src_height[0] = frame->height;
src_pitch[1] = src_pitch[0] = frame->pitch[1];
src_width[4] = src_width[5] = src_width[2] = src_width[3] =
frame->width / 2;
src_height[4] = src_height[5] = src_height[2] = src_height[3] =
frame->height / 2;
src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] =
frame->pitch[0];
n_src = 6;
} else {
if (frame->id == FOURCC_UYVY)
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
else
src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
 
src_width[0] = frame->width;
src_height[0] = frame->height;
src_pitch[0] = frame->pitch[0];
n_src = 1;
}
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
for (n = 0; n < n_src; n++) {
binding_table[1+n] =
gen6_bind_video_source(sna,
frame->bo,
src_surf_base[n],
src_width[n],
src_height[n],
src_pitch[n],
src_surf_format);
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static bool
gen6_render_video(struct sna *sna,
struct sna_video *video,
struct sna_video_frame *frame,
RegionPtr dstRegion,
short src_w, short src_h,
short drw_w, short drw_h,
short dx, short dy,
PixmapPtr pixmap)
{
struct sna_composite_op tmp;
int nbox, pix_xoff, pix_yoff;
float src_scale_x, src_scale_y;
struct sna_pixmap *priv;
unsigned filter;
BoxPtr box;
 
DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
__FUNCTION__, src_w, src_h, drw_w, drw_h,
REGION_NUM_RECTS(dstRegion),
REGION_EXTENTS(NULL, dstRegion)->x1,
REGION_EXTENTS(NULL, dstRegion)->y1,
REGION_EXTENTS(NULL, dstRegion)->x2,
REGION_EXTENTS(NULL, dstRegion)->y2));
 
priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
if (priv == NULL)
return false;
 
memset(&tmp, 0, sizeof(tmp));
 
tmp.dst.pixmap = pixmap;
tmp.dst.width = pixmap->drawable.width;
tmp.dst.height = pixmap->drawable.height;
tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
tmp.dst.bo = priv->gpu_bo;
 
tmp.src.bo = frame->bo;
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 3;
tmp.floats_per_rect = 9;
 
if (src_w == drw_w && src_h == drw_h)
filter = SAMPLER_FILTER_NEAREST;
else
filter = SAMPLER_FILTER_BILINEAR;
 
tmp.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
NO_BLEND,
is_planar_fourcc(frame->id) ?
GEN6_WM_KERNEL_VIDEO_PLANAR :
GEN6_WM_KERNEL_VIDEO_PACKED,
2);
tmp.priv = frame;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_video_state(sna, &tmp, frame);
gen6_align_vertex(sna, &tmp);
 
/* Set up the offset for translating from the given region (in screen
* coordinates) to the backing pixmap.
*/
#ifdef COMPOSITE
pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
#else
pix_xoff = 0;
pix_yoff = 0;
#endif
 
/* Use normalized texture coordinates */
src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
 
box = REGION_RECTS(dstRegion);
nbox = REGION_NUM_RECTS(dstRegion);
while (nbox--) {
BoxRec r;
 
r.x1 = box->x1 + pix_xoff;
r.x2 = box->x2 + pix_xoff;
r.y1 = box->y1 + pix_yoff;
r.y2 = box->y2 + pix_yoff;
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
 
OUT_VERTEX(r.x2, r.y2);
OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y2);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
 
OUT_VERTEX(r.x1, r.y1);
OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
 
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
sna_damage_add_box(&priv->gpu_damage, &r);
sna_damage_subtract_box(&priv->cpu_damage, &r);
}
box++;
}
priv->clear = false;
 
gen4_vertex_flush(sna);
return true;
}
 
static int
gen6_composite_picture(struct sna *sna,
PicturePtr picture,
struct sna_composite_channel *channel,
int x, int y,
int w, int h,
int dst_x, int dst_y,
bool precise)
{
PixmapPtr pixmap;
uint32_t color;
int16_t dx, dy;
 
DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
__FUNCTION__, x, y, w, h, dst_x, dst_y));
 
channel->is_solid = false;
channel->card_format = -1;
 
if (sna_picture_is_solid(picture, &color))
return gen4_channel_init_solid(sna, channel, color);
 
if (picture->pDrawable == NULL) {
int ret;
 
if (picture->pSourcePict->type == SourcePictTypeLinear)
return gen4_channel_init_linear(sna, picture, channel,
x, y,
w, h,
dst_x, dst_y);
 
DBG(("%s -- fixup, gradient\n", __FUNCTION__));
ret = -1;
if (!precise)
ret = sna_render_picture_approximate_gradient(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
if (ret == -1)
ret = sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
return ret;
}
 
if (picture->alphaMap) {
DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
if (!gen6_check_repeat(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
if (!gen6_check_filter(picture))
return sna_render_picture_fixup(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
 
channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
channel->filter = picture->filter;
 
pixmap = get_drawable_pixmap(picture->pDrawable);
get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
 
x += dx + picture->pDrawable->x;
y += dy + picture->pDrawable->y;
 
channel->is_affine = sna_transform_is_affine(picture->transform);
if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
DBG(("%s: integer translation (%d, %d), removing\n",
__FUNCTION__, dx, dy));
x += dx;
y += dy;
channel->transform = NULL;
channel->filter = PictFilterNearest;
} else
channel->transform = picture->transform;
 
channel->pict_format = picture->format;
channel->card_format = gen6_get_card_format(picture->format);
if (channel->card_format == (unsigned)-1)
return sna_render_picture_convert(sna, picture, channel, pixmap,
x, y, w, h, dst_x, dst_y,
false);
 
if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
return sna_render_picture_extract(sna, picture, channel,
x, y, w, h, dst_x, dst_y);
}
 
return sna_render_pixmap_bo(sna, channel, pixmap,
x, y, w, h, dst_x, dst_y);
}
 
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
{
channel->repeat = gen6_repeat(channel->repeat);
channel->filter = gen6_filter(channel->filter);
if (channel->card_format == (unsigned)-1)
channel->card_format = gen6_get_card_format(channel->pict_format);
assert(channel->card_format != (unsigned)-1);
}
 
static void gen6_render_composite_done(struct sna *sna,
const struct sna_composite_op *op)
{
DBG(("%s\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset) {
gen4_vertex_flush(sna);
gen6_magic_ca_pass(sna, op);
}
 
// if (op->mask.bo)
// kgem_bo_destroy(&sna->kgem, op->mask.bo);
// if (op->src.bo)
// kgem_bo_destroy(&sna->kgem, op->src.bo);
 
// sna_render_composite_redirect_done(sna, op);
}
 
static bool
gen6_composite_set_target(struct sna *sna,
struct sna_composite_op *op,
PicturePtr dst,
int x, int y, int w, int h)
{
BoxRec box;
 
op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
op->dst.format = dst->format;
op->dst.width = op->dst.pixmap->drawable.width;
op->dst.height = op->dst.pixmap->drawable.height;
 
if (w && h) {
box.x1 = x;
box.y1 = y;
box.x2 = x + w;
box.y2 = y + h;
} else
sna_render_picture_extents(dst, &box);
 
op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
PREFER_GPU | FORCE_GPU | RENDER_GPU,
&box, &op->damage);
if (op->dst.bo == NULL)
return false;
 
get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
&op->dst.x, &op->dst.y);
 
DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
__FUNCTION__,
op->dst.pixmap, (int)op->dst.format,
op->dst.width, op->dst.height,
op->dst.bo->pitch,
op->dst.x, op->dst.y,
op->damage ? *op->damage : (void *)-1));
 
assert(op->dst.bo->proxy == NULL);
 
if (too_large(op->dst.width, op->dst.height) &&
!sna_render_composite_redirect(sna, op, x, y, w, h))
return false;
 
return true;
}
 
 
 
static bool
gen6_render_composite(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr mask,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t msk_x, int16_t msk_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
struct sna_composite_op *tmp)
{
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
width, height, sna->kgem.ring));
 
if (mask == NULL &&
try_blt(sna, dst, src, width, height) &&
sna_blt_composite(sna, op,
src, dst,
src_x, src_y,
dst_x, dst_y,
width, height,
tmp, false))
return true;
 
if (gen6_composite_fallback(sna, src, mask, dst))
return false;
 
if (need_tiling(sna, width, height))
return sna_tiling_composite(op, src, mask, dst,
src_x, src_y,
msk_x, msk_y,
dst_x, dst_y,
width, height,
tmp);
 
if (op == PictOpClear)
op = PictOpSrc;
tmp->op = op;
if (!gen6_composite_set_target(sna, tmp, dst,
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
/* Did we just switch rings to prepare the source? */
if (mask == NULL &&
prefer_blt_composite(sna, tmp) &&
sna_blt_composite__convert(sna,
dst_x, dst_y, width, height,
tmp))
return true;
 
gen6_composite_channel_convert(&tmp->src);
break;
}
 
tmp->is_affine = tmp->src.is_affine;
tmp->has_component_alpha = false;
tmp->need_magic_ca_pass = false;
 
tmp->mask.bo = NULL;
tmp->mask.filter = SAMPLER_FILTER_NEAREST;
tmp->mask.repeat = SAMPLER_EXTEND_NONE;
 
if (mask) {
if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
tmp->has_component_alpha = true;
 
/* Check if it's component alpha that relies on a source alpha and on
* the source value. We can only get one of those into the single
* source value that we get to blend with.
*/
if (gen6_blend_op[op].src_alpha &&
(gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
if (op != PictOpOver)
goto cleanup_src;
 
tmp->need_magic_ca_pass = true;
tmp->op = PictOpOutReverse;
}
}
 
if (!reuse_source(sna,
src, &tmp->src, src_x, src_y,
mask, &tmp->mask, msk_x, msk_y)) {
switch (gen6_composite_picture(sna, mask, &tmp->mask,
msk_x, msk_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_src;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
goto cleanup_src;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->mask);
break;
}
}
 
tmp->is_affine &= tmp->mask.is_affine;
}
 
tmp->u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
tmp->src.repeat,
tmp->mask.filter,
tmp->mask.repeat),
gen6_get_blend(tmp->op,
tmp->has_component_alpha,
tmp->dst.format),
gen6_choose_composite_kernel(tmp->op,
tmp->mask.bo != NULL,
tmp->has_component_alpha,
tmp->is_affine),
gen4_choose_composite_emitter(tmp));
 
tmp->blt = gen6_render_composite_blt;
tmp->box = gen6_render_composite_box;
tmp->boxes = gen6_render_composite_boxes__blt;
if (tmp->emit_boxes) {
tmp->boxes = gen6_render_composite_boxes;
tmp->thread_boxes = gen6_render_composite_boxes__thread;
}
tmp->done = gen6_render_composite_done;
 
 
 
gen6_emit_composite_state(sna, tmp);
gen6_align_vertex(sna, tmp);
return true;
 
cleanup_mask:
if (tmp->mask.bo)
kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
cleanup_src:
if (tmp->src.bo)
kgem_bo_destroy(&sna->kgem, tmp->src.bo);
cleanup_dst:
if (tmp->redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
return false;
}
 
#if !NO_COMPOSITE_SPANS
fastcall static void
gen6_render_composite_spans_box(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, float opacity)
{
DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
__FUNCTION__,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
op->prim_emit(sna, op, box, opacity);
}
 
static void
gen6_render_composite_spans_boxes(struct sna *sna,
const struct sna_composite_spans_op *op,
const BoxRec *box, int nbox,
float opacity)
{
DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
opacity,
op->base.dst.x, op->base.dst.y));
 
do {
int nbox_this_time;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
nbox -= nbox_this_time;
 
do {
DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
box->x1, box->y1,
box->x2 - box->x1,
box->y2 - box->y1));
 
op->prim_emit(sna, op, box++, opacity);
} while (--nbox_this_time);
} while (nbox);
}
 
fastcall static void
gen6_render_composite_spans_boxes__thread(struct sna *sna,
const struct sna_composite_spans_op *op,
const struct sna_opacity_box *box,
int nbox)
{
DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
__FUNCTION__, nbox,
op->base.src.offset[0], op->base.src.offset[1],
op->base.dst.x, op->base.dst.y));
 
sna_vertex_lock(&sna->render);
do {
int nbox_this_time;
float *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_composite_state);
assert(nbox_this_time);
nbox -= nbox_this_time;
 
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
 
sna_vertex_acquire__locked(&sna->render);
sna_vertex_unlock(&sna->render);
 
op->emit_boxes(op, box, nbox_this_time, v);
box += nbox_this_time;
 
sna_vertex_lock(&sna->render);
sna_vertex_release__locked(&sna->render);
} while (nbox);
sna_vertex_unlock(&sna->render);
}
 
fastcall static void
gen6_render_composite_spans_done(struct sna *sna,
const struct sna_composite_spans_op *op)
{
DBG(("%s()\n", __FUNCTION__));
assert(!sna->render.active);
 
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
 
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
 
sna_render_composite_redirect_done(sna, &op->base);
}
 
static bool
gen6_check_composite_spans(struct sna *sna,
uint8_t op, PicturePtr src, PicturePtr dst,
int16_t width, int16_t height,
unsigned flags)
{
DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
__FUNCTION__, op, width, height, flags));
 
if (op >= ARRAY_SIZE(gen6_blend_op))
return false;
 
if (gen6_composite_fallback(sna, src, NULL, dst)) {
DBG(("%s: operation would fallback\n", __FUNCTION__));
return false;
}
 
if (need_tiling(sna, width, height) &&
!is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
DBG(("%s: fallback, tiled operation not on GPU\n",
__FUNCTION__));
return false;
}
 
if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
assert(priv);
 
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
return true;
 
if (flags & COMPOSITE_SPANS_INPLACE_HINT)
return false;
 
return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
}
 
return true;
}
 
static bool
gen6_render_composite_spans(struct sna *sna,
uint8_t op,
PicturePtr src,
PicturePtr dst,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
int16_t width, int16_t height,
unsigned flags,
struct sna_composite_spans_op *tmp)
{
DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
width, height, flags, sna->kgem.ring));
 
assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
 
if (need_tiling(sna, width, height)) {
DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
__FUNCTION__, width, height));
return sna_tiling_composite_spans(op, src, dst,
src_x, src_y, dst_x, dst_y,
width, height, flags, tmp);
}
 
tmp->base.op = op;
if (!gen6_composite_set_target(sna, &tmp->base, dst,
dst_x, dst_y, width, height))
return false;
 
switch (gen6_composite_picture(sna, src, &tmp->base.src,
src_x, src_y,
width, height,
dst_x, dst_y,
dst->polyMode == PolyModePrecise)) {
case -1:
goto cleanup_dst;
case 0:
if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
goto cleanup_dst;
/* fall through to fixup */
case 1:
gen6_composite_channel_convert(&tmp->base.src);
break;
}
tmp->base.mask.bo = NULL;
 
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
 
tmp->base.u.gen6.flags =
GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
tmp->base.src.repeat,
SAMPLER_FILTER_NEAREST,
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
gen4_choose_spans_emitter(tmp));
 
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
if (tmp->emit_boxes)
tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
tmp->done = gen6_render_composite_spans_done;
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem,
tmp->base.dst.bo, tmp->base.src.bo,
NULL))
goto cleanup_src;
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_composite_state(sna, &tmp->base);
gen6_align_vertex(sna, &tmp->base);
return true;
 
cleanup_src:
if (tmp->base.src.bo)
kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
cleanup_dst:
if (tmp->base.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
return false;
}
#endif
 
#endif
 
static void
gen6_emit_copy_state(struct sna *sna,
const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, op->src.width, op->src.height,
op->src.card_format,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
#if 0
 
static inline bool prefer_blt_copy(struct sna *sna,
struct kgem_bo *src_bo,
struct kgem_bo *dst_bo,
unsigned flags)
{
if (flags & COPY_SYNC)
return false;
 
if (PREFER_RENDER)
return PREFER_RENDER > 0;
 
if (sna->kgem.ring == KGEM_BLT)
return true;
 
if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
return true;
 
if (untiled_tlb_miss(src_bo) ||
untiled_tlb_miss(dst_bo))
return true;
 
if (!prefer_blt_ring(sna, dst_bo, flags))
return false;
 
return (prefer_blt_bo(sna, src_bo) >= 0 &&
prefer_blt_bo(sna, dst_bo) > 0);
}
 
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
{
*extents = box[0];
while (--n) {
box++;
 
if (box->x1 < extents->x1)
extents->x1 = box->x1;
if (box->x2 > extents->x2)
extents->x2 = box->x2;
 
if (box->y1 < extents->y1)
extents->y1 = box->y1;
if (box->y2 > extents->y2)
extents->y2 = box->y2;
}
}
 
static inline bool
overlaps(struct sna *sna,
struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, BoxRec *extents)
{
if (src_bo != dst_bo)
return false;
 
boxes_extents(box, n, extents);
return (extents->x2 + src_dx > extents->x1 + dst_dx &&
extents->x1 + src_dx < extents->x2 + dst_dx &&
extents->y2 + src_dy > extents->y1 + dst_dy &&
extents->y1 + src_dy < extents->y2 + dst_dy);
}
 
static bool
gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const BoxRec *box, int n, unsigned flags)
{
struct sna_composite_op tmp;
BoxRec extents;
 
DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
__FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
src_bo == dst_bo,
overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)));
 
if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
if (!(alu == GXcopy || alu == GXclear)) {
fallback_blt:
if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
return false;
 
return sna_blt_copy_boxes_fallback(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
if (overlaps(sna,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
box, n, &extents)) {
if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
goto fallback_blt;
 
if (can_switch_to_blt(sna, dst_bo, flags) &&
sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_render_copy_boxes__overlap(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n, &extents);
}
 
if (dst->drawable.depth == src->drawable.depth) {
tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = tmp.dst.format;
} else {
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
}
if (!gen6_check_format(tmp.src.pict_format))
goto fallback_blt;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(tmp.dst.width, tmp.dst.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (box[i].x1 < extents.x1)
extents.x1 = box[i].x1;
if (box[i].y1 < extents.y1)
extents.y1 = box[i].y1;
 
if (box[i].x2 > extents.x2)
extents.x2 = box[i].x2;
if (box[i].y2 > extents.y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1 + dst_dx,
extents.y1 + dst_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
goto fallback_tiled;
 
dst_dx += tmp.dst.x;
dst_dy += tmp.dst.y;
 
tmp.dst.x = tmp.dst.y = 0;
}
 
tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
if (too_large(src->drawable.width, src->drawable.height)) {
int i;
 
extents = box[0];
for (i = 1; i < n; i++) {
if (extents.x1 < box[i].x1)
extents.x1 = box[i].x1;
if (extents.y1 < box[i].y1)
extents.y1 = box[i].y1;
 
if (extents.x2 > box[i].x2)
extents.x2 = box[i].x2;
if (extents.y2 > box[i].y2)
extents.y2 = box[i].y2;
}
 
if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
extents.x1 + src_dx,
extents.y1 + src_dy,
extents.x2 - extents.x1,
extents.y2 - extents.y1)) {
DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
goto fallback_tiled_dst;
}
 
src_dx += tmp.src.offset[0];
src_dy += tmp.src.offset[1];
} else {
tmp.src.bo = src_bo;
tmp.src.width = src->drawable.width;
tmp.src.height = src->drawable.height;
}
 
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = 0;
 
tmp.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
 
kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
DBG(("%s: too large for a single operation\n",
__FUNCTION__));
goto fallback_tiled_src;
}
_kgem_set_mode(&sna->kgem, KGEM_RENDER);
}
 
gen6_emit_copy_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int16_t *v;
int n_this_time;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_copy_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
 
DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n",
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1));
v[0] = box->x2 + dst_dx;
v[2] = box->x2 + src_dx;
v[1] = v[5] = box->y2 + dst_dy;
v[3] = v[7] = box->y2 + src_dy;
v[8] = v[4] = box->x1 + dst_dx;
v[10] = v[6] = box->x1 + src_dx;
v[9] = box->y1 + dst_dy;
v[11] = box->y1 + src_dy;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
sna_render_composite_redirect_done(sna, &tmp);
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return true;
 
fallback_tiled_src:
if (tmp.src.bo != src_bo)
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
fallback_tiled_dst:
if (tmp.redirect.real_bo)
kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
fallback_tiled:
if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
sna_blt_copy_boxes(sna, alu,
src_bo, src_dx, src_dy,
dst_bo, dst_dx, dst_dy,
dst->drawable.bitsPerPixel,
box, n))
return true;
 
return sna_tiling_copy_boxes(sna, alu,
src, src_bo, src_dx, src_dy,
dst, dst_bo, dst_dx, dst_dy,
box, n);
}
 
#endif
 
static void
gen6_render_copy_blt(struct sna *sna,
const struct sna_copy_op *op,
int16_t sx, int16_t sy,
int16_t w, int16_t h,
int16_t dx, int16_t dy)
{
int16_t *v;
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dx+w; v[1] = dy+h;
v[2] = sx+w; v[3] = sy+h;
v[4] = dx; v[5] = dy+h;
v[6] = sx; v[7] = sy+h;
v[8] = dx; v[9] = dy;
v[10] = sx; v[11] = sy;
}
 
static void
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
}
 
static bool
gen6_render_copy(struct sna *sna, uint8_t alu,
PixmapPtr src, struct kgem_bo *src_bo,
PixmapPtr dst, struct kgem_bo *dst_bo,
struct sna_copy_op *op)
{
DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
__FUNCTION__, alu,
src->drawable.width, src->drawable.height,
dst->drawable.width, dst->drawable.height));
 
fallback:
 
op->base.dst.format = PIXMAN_a8r8g8b8;
op->base.src.pict_format = op->base.dst.format;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.bo = dst_bo;
 
op->base.src.bo = src_bo;
op->base.src.card_format =
gen6_get_card_format(op->base.src.pict_format);
op->base.src.width = src->drawable.width;
op->base.src.height = src->drawable.height;
 
op->base.mask.bo = NULL;
 
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = COPY_FLAGS(alu);
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
 
 
gen6_emit_copy_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_copy_blt;
op->done = gen6_render_copy_done;
return true;
}
 
#if 0
 
static void
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
{
uint32_t *binding_table;
uint16_t offset;
bool dirty;
 
dirty = gen6_get_batch(sna, op);
 
binding_table = gen6_composite_get_binding_table(sna, &offset);
 
binding_table[0] =
gen6_bind_bo(sna,
op->dst.bo, op->dst.width, op->dst.height,
gen6_get_dest_format(op->dst.format),
true);
binding_table[1] =
gen6_bind_bo(sna,
op->src.bo, 1, 1,
GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
false);
 
if (sna->kgem.surface == offset &&
*(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
sna->kgem.surface +=
sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
offset = sna->render_state.gen6.surface_table;
}
 
gen6_emit_state(sna, op, offset | dirty);
}
 
static inline bool prefer_blt_fill(struct sna *sna,
struct kgem_bo *bo)
{
if (PREFER_RENDER)
return PREFER_RENDER < 0;
 
if (untiled_tlb_miss(bo))
return true;
 
return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
}
 
static bool
gen6_render_fill_boxes(struct sna *sna,
CARD8 op,
PictFormat format,
const xRenderColor *color,
PixmapPtr dst, struct kgem_bo *dst_bo,
const BoxRec *box, int n)
{
struct sna_composite_op tmp;
uint32_t pixel;
 
DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
__FUNCTION__, op,
color->red, color->green, color->blue, color->alpha, (int)format));
 
if (op >= ARRAY_SIZE(gen6_blend_op)) {
DBG(("%s: fallback due to unhandled blend op: %d\n",
__FUNCTION__, op));
return false;
}
 
if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
uint8_t alu = GXinvalid;
 
if (op <= PictOpSrc) {
pixel = 0;
if (op == PictOpClear)
alu = GXclear;
else if (sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
format))
alu = GXcopy;
}
 
if (alu != GXinvalid &&
sna_blt_fill_boxes(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
pixel, box, n))
return true;
 
if (!gen6_check_dst_format(format))
return false;
}
 
if (op == PictOpClear) {
pixel = 0;
op = PictOpSrc;
} else if (!sna_get_pixel_from_rgba(&pixel,
color->red,
color->green,
color->blue,
color->alpha,
PICT_a8r8g8b8))
return false;
 
DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
__FUNCTION__, pixel, n,
box[0].x1, box[0].y1, box[0].x2, box[0].y2));
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = format;
tmp.dst.bo = dst_bo;
tmp.dst.x = tmp.dst.y = 0;
tmp.damage = NULL;
 
sna_render_composite_redirect_init(&tmp);
if (too_large(dst->drawable.width, dst->drawable.height)) {
BoxRec extents;
 
boxes_extents(box, n, &extents);
if (!sna_render_composite_redirect(sna, &tmp,
extents.x1, extents.y1,
extents.x2 - extents.x1,
extents.y2 - extents.y1))
return sna_tiling_fill_boxes(sna, op, format, color,
dst, dst_bo, box, n);
}
 
tmp.src.bo = sna_render_get_solid(sna, pixel);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS(op, format);
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
do {
int n_this_time;
int16_t *v;
 
n_this_time = gen6_get_rectangles(sna, &tmp, n,
gen6_emit_fill_state);
n -= n_this_time;
 
v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
sna->render.vertex_used += 6 * n_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
do {
DBG((" (%d, %d), (%d, %d)\n",
box->x1, box->y1, box->x2, box->y2));
 
v[0] = box->x2;
v[5] = v[1] = box->y2;
v[8] = v[4] = box->x1;
v[9] = box->y1;
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
v += 12; box++;
} while (--n_this_time);
} while (n);
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
sna_render_composite_redirect_done(sna, &tmp);
return true;
}
 
static void
gen6_render_op_fill_blt(struct sna *sna,
const struct sna_fill_op *op,
int16_t x, int16_t y, int16_t w, int16_t h)
{
int16_t *v;
 
DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x+w;
v[4] = v[8] = x;
v[1] = v[5] = y+h;
v[9] = y;
 
v[2] = v[3] = v[7] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_box(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box)
{
int16_t *v;
 
DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2));
 
gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
}
 
fastcall static void
gen6_render_op_fill_boxes(struct sna *sna,
const struct sna_fill_op *op,
const BoxRec *box,
int nbox)
{
DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
box->x1, box->y1, box->x2, box->y2, nbox));
 
do {
int nbox_this_time;
int16_t *v;
 
nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
gen6_emit_fill_state);
nbox -= nbox_this_time;
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6 * nbox_this_time;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
do {
v[0] = box->x2;
v[8] = v[4] = box->x1;
v[5] = v[1] = box->y2;
v[9] = box->y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
box++; v += 12;
} while (--nbox_this_time);
} while (nbox);
}
 
static void
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
{
DBG(("%s()\n", __FUNCTION__));
 
assert(!sna->render.active);
if (sna->render.vertex_offset)
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
}
 
static bool
gen6_render_fill(struct sna *sna, uint8_t alu,
PixmapPtr dst, struct kgem_bo *dst_bo,
uint32_t color,
struct sna_fill_op *op)
{
DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
 
if (prefer_blt_fill(sna, dst_bo) &&
sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op))
return true;
 
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return sna_blt_fill(sna, alu,
dst_bo, dst->drawable.bitsPerPixel,
color,
op);
 
if (alu == GXclear)
color = 0;
 
op->base.dst.pixmap = dst;
op->base.dst.width = dst->drawable.width;
op->base.dst.height = dst->drawable.height;
op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
op->base.dst.bo = dst_bo;
op->base.dst.x = op->base.dst.y = 0;
 
op->base.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
op->base.mask.bo = NULL;
 
op->base.need_magic_ca_pass = false;
op->base.floats_per_vertex = 2;
op->base.floats_per_rect = 6;
 
op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
kgem_submit(&sna->kgem);
assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
}
 
gen6_emit_fill_state(sna, &op->base);
gen6_align_vertex(sna, &op->base);
 
op->blt = gen6_render_op_fill_blt;
op->box = gen6_render_op_fill_box;
op->boxes = gen6_render_op_fill_boxes;
op->done = gen6_render_op_fill_done;
return true;
}
 
static bool
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint8_t alu)
{
BoxRec box;
 
box.x1 = x1;
box.y1 = y1;
box.x2 = x2;
box.y2 = y2;
 
return sna_blt_fill_boxes(sna, alu,
bo, dst->drawable.bitsPerPixel,
color, &box, 1);
}
 
static bool
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
uint32_t color,
int16_t x1, int16_t y1,
int16_t x2, int16_t y2,
uint8_t alu)
{
struct sna_composite_op tmp;
int16_t *v;
 
/* Prefer to use the BLT if already engaged */
if (prefer_blt_fill(sna, bo) &&
gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (!(alu == GXcopy || alu == GXclear) ||
too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_fill_one_try_blt(sna, dst, bo, color,
x1, y1, x2, y2, alu);
 
if (alu == GXclear)
color = 0;
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo =
sna_render_get_solid(sna,
sna_rgba_for_color(color,
dst->drawable.depth));
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_submit(&sna->kgem);
if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
return false;
}
}
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
DBG((" (%d, %d), (%d, %d)\n", x1, y1, x2, y2));
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = x2;
v[8] = v[4] = x1;
v[5] = v[1] = y2;
v[9] = y1;
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
 
static bool
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
BoxRec box;
 
box.x1 = 0;
box.y1 = 0;
box.x2 = dst->drawable.width;
box.y2 = dst->drawable.height;
 
return sna_blt_fill_boxes(sna, GXclear,
bo, dst->drawable.bitsPerPixel,
0, &box, 1);
}
 
static bool
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
{
struct sna_composite_op tmp;
int16_t *v;
 
DBG(("%s: %dx%d\n",
__FUNCTION__,
dst->drawable.width,
dst->drawable.height));
 
/* Prefer to use the BLT if, and only if, already engaged */
if (sna->kgem.ring == KGEM_BLT &&
gen6_render_clear_try_blt(sna, dst, bo))
return true;
 
/* Must use the BLT if we can't RENDER... */
if (too_large(dst->drawable.width, dst->drawable.height))
return gen6_render_clear_try_blt(sna, dst, bo);
 
tmp.dst.pixmap = dst;
tmp.dst.width = dst->drawable.width;
tmp.dst.height = dst->drawable.height;
tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
tmp.dst.bo = bo;
tmp.dst.x = tmp.dst.y = 0;
 
tmp.src.bo = sna_render_get_solid(sna, 0);
tmp.mask.bo = NULL;
 
tmp.floats_per_vertex = 2;
tmp.floats_per_rect = 6;
tmp.need_magic_ca_pass = false;
 
tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
 
 
 
gen6_emit_fill_state(sna, &tmp);
gen6_align_vertex(sna, &tmp);
 
gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
 
v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
sna->render.vertex_used += 6;
assert(sna->render.vertex_used <= sna->render.vertex_size);
 
v[0] = dst->drawable.width;
v[5] = v[1] = dst->drawable.height;
v[8] = v[4] = 0;
v[9] = 0;
 
v[7] = v[2] = v[3] = 1;
v[6] = v[10] = v[11] = 0;
 
gen4_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 
return true;
}
 
static void gen6_render_flush(struct sna *sna)
{
gen4_vertex_close(sna);
 
assert(sna->render.vb_id == 0);
assert(sna->render.vertex_offset == 0);
}
 
#endif
 
static void
gen6_render_retire(struct kgem *kgem)
{
struct sna *sna;
 
if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
kgem->ring = kgem->mode;
 
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
}
 
 
static void gen6_render_reset(struct sna *sna)
{
sna->render_state.gen6.needs_invariant = true;
sna->render_state.gen6.first_state_packet = true;
sna->render_state.gen6.ve_id = 3 << 2;
sna->render_state.gen6.last_primitive = -1;
 
sna->render_state.gen6.num_sf_outputs = 0;
sna->render_state.gen6.samplers = -1;
sna->render_state.gen6.blend = -1;
sna->render_state.gen6.kernel = -1;
sna->render_state.gen6.drawrect_offset = -1;
sna->render_state.gen6.drawrect_limit = -1;
sna->render_state.gen6.surface_table = -1;
 
sna->render.vertex_offset = 0;
sna->render.nvertex_reloc = 0;
sna->render.vb_id = 0;
}
 
static void gen6_render_fini(struct sna *sna)
{
// kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
}
 
static bool is_gt2(struct sna *sna)
{
return DEVICE_ID(sna->PciInfo) & 0x30;
}
 
static bool is_mobile(struct sna *sna)
{
return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
}
 
static bool gen6_render_setup(struct sna *sna)
{
struct gen6_render_state *state = &sna->render_state.gen6;
struct sna_static_stream general;
struct gen6_sampler_state *ss;
int i, j, k, l, m;
 
state->info = &gt1_info;
if (is_gt2(sna))
state->info = &gt2_info; /* XXX requires GT_MODE WiZ disabled */
 
sna_static_stream_init(&general);
 
/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
* dumps, you know it points to zero.
*/
null_create(&general);
scratch_create(&general);
 
for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
if (wm_kernels[m].size) {
state->wm_kernel[m][1] =
sna_static_stream_add(&general,
wm_kernels[m].data,
wm_kernels[m].size,
64);
} else {
if (USE_8_PIXEL_DISPATCH) {
state->wm_kernel[m][0] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 8);
}
 
if (USE_16_PIXEL_DISPATCH) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
 
if (USE_32_PIXEL_DISPATCH) {
state->wm_kernel[m][2] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 32);
}
}
if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
state->wm_kernel[m][1] =
sna_static_stream_compile_wm(sna, &general,
wm_kernels[m].data, 16);
}
}
 
ss = sna_static_stream_map(&general,
2 * sizeof(*ss) *
(2 +
FILTER_COUNT * EXTEND_COUNT *
FILTER_COUNT * EXTEND_COUNT),
32);
state->wm_state = sna_static_stream_offsetof(&general, ss);
sampler_copy_init(ss); ss += 2;
sampler_fill_init(ss); ss += 2;
for (i = 0; i < FILTER_COUNT; i++) {
for (j = 0; j < EXTEND_COUNT; j++) {
for (k = 0; k < FILTER_COUNT; k++) {
for (l = 0; l < EXTEND_COUNT; l++) {
sampler_state_init(ss++, i, j);
sampler_state_init(ss++, k, l);
}
}
}
}
 
state->cc_blend = gen6_composite_create_blend_state(&general);
 
state->general_bo = sna_static_stream_fini(sna, &general);
return state->general_bo != NULL;
}
 
bool gen6_render_init(struct sna *sna)
{
if (!gen6_render_setup(sna))
return false;
 
// sna->kgem.context_switch = gen6_render_context_switch;
sna->kgem.retire = gen6_render_retire;
 
// sna->render.composite = gen6_render_composite;
// sna->render.video = gen6_render_video;
 
// sna->render.copy_boxes = gen6_render_copy_boxes;
sna->render.copy = gen6_render_copy;
 
// sna->render.fill_boxes = gen6_render_fill_boxes;
// sna->render.fill = gen6_render_fill;
// sna->render.fill_one = gen6_render_fill_one;
// sna->render.clear = gen6_render_clear;
 
// sna->render.flush = gen6_render_flush;
sna->render.reset = gen6_render_reset;
sna->render.fini = gen6_render_fini;
 
sna->render.max_3d_size = GEN6_MAX_SIZE;
sna->render.max_3d_pitch = 1 << 18;
return true;
}
 
 
void gen4_vertex_flush(struct sna *sna)
{
DBG(("%s[%x] = %d\n", __FUNCTION__,
4*sna->render.vertex_offset,
sna->render.vertex_index - sna->render.vertex_start));
 
assert(sna->render.vertex_offset);
assert(sna->render.vertex_index > sna->render.vertex_start);
 
sna->kgem.batch[sna->render.vertex_offset] =
sna->render.vertex_index - sna->render.vertex_start;
sna->render.vertex_offset = 0;
}
 
int gen4_vertex_finish(struct sna *sna)
{
struct kgem_bo *bo;
unsigned int i;
unsigned hint, size;
 
DBG(("%s: used=%d / %d\n", __FUNCTION__,
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_offset == 0);
assert(sna->render.vertex_used);
 
// sna_vertex_wait__locked(&sna->render);
 
/* Note: we only need dword alignment (currently) */
 
bo = sna->render.vbo;
if (bo) {
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
 
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
}
 
assert(!sna->render.active);
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render.vbo = NULL;
sna->render.vb_id = 0;
 
kgem_bo_destroy(&sna->kgem, bo);
}
 
hint = CREATE_GTT_MAP;
if (bo)
hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
 
size = 256*1024;
assert(!sna->render.active);
sna->render.vertices = NULL;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
while (sna->render.vbo == NULL && size > 16*1024) {
size /= 2;
sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
}
if (sna->render.vbo == NULL)
sna->render.vbo = kgem_create_linear(&sna->kgem,
256*1024, CREATE_GTT_MAP);
if (sna->render.vbo)
sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
if (sna->render.vertices == NULL) {
if (sna->render.vbo) {
kgem_bo_destroy(&sna->kgem, sna->render.vbo);
sna->render.vbo = NULL;
}
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
return 0;
}
 
if (sna->render.vertex_used) {
DBG(("%s: copying initial buffer x %d to handle=%d\n",
__FUNCTION__,
sna->render.vertex_used,
sna->render.vbo->handle));
assert(sizeof(float)*sna->render.vertex_used <=
__kgem_bo_size(sna->render.vbo));
memcpy(sna->render.vertices,
sna->render.vertex_data,
sizeof(float)*sna->render.vertex_used);
}
 
size = __kgem_bo_size(sna->render.vbo)/4;
if (size >= UINT16_MAX)
size = UINT16_MAX - 1;
 
DBG(("%s: create vbo handle=%d, size=%d\n",
__FUNCTION__, sna->render.vbo->handle, size));
 
sna->render.vertex_size = size;
return sna->render.vertex_size - sna->render.vertex_used;
}
 
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
return NULL;
};
 
bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
const void *data, int length)
{
return false;
}
 
/drivers/video/Intel-2D/gen6_render.h
0,0 → 1,1563
#ifndef GEN6_RENDER_H
#define GEN6_RENDER_H
 
#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT 16
# define GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT 8
# define GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT 0
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_32_DISPATCH_ENABLE (1 << 2)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* VERTEX_BUFFER_STATE Structure */
#define VB0_BUFFER_INDEX_SHIFT 26
#define VB0_VERTEXDATA (0 << 20)
#define VB0_INSTANCEDATA (1 << 20)
#define VB0_BUFFER_PITCH_SHIFT 0
 
/* VERTEX_ELEMENT_STATE Structure */
#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */
#define VE0_VALID (1 << 25) /* for GEN6 */
#define VE0_FORMAT_SHIFT 16
#define VE0_OFFSET_SHIFT 0
#define VE1_VFCOMPONENT_0_SHIFT 28
#define VE1_VFCOMPONENT_1_SHIFT 24
#define VE1_VFCOMPONENT_2_SHIFT 20
#define VE1_VFCOMPONENT_3_SHIFT 16
#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \
((Pipeline) << 27) | \
((Opcode) << 24) | \
((Subopcode) << 16))
 
#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1)
#define GEN6_STATE_SIP GEN6_3D(0, 1, 2)
 
#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4)
 
#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0)
#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0)
 
#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1)
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */
# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */
 
#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8)
#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9)
#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa)
#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb)
 
#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0)
#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1)
#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2)
#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4)
#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5)
# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29
# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18
 
#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6)
#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7)
#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8)
#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9)
/* These two are BLC and CTG only, not BW or CL */
#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa)
#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb)
 
#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0)
 
#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10)
/* DW1 */
# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15)
 
/* for GEN6+ */
#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9)
# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8)
 
#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05)
/* DW1 */
# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16
# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0
/* DW2 */
# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8
# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0
 
#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11)
# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10)
 
#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e)
 
#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10)
 
#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11)
/* DW4 */
# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0
 
#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12)
 
#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13)
/* DW1 */
# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11
# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4
/* DW2 */
/* DW3 */
# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29)
# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29)
# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29)
# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29)
/* DW4 */
# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29
# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27
# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25
 
 
#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14)
/* DW2 */
# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27
# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18
/* DW4 */
# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16
/* DW5 */
# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25
# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19)
# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1)
# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0)
/* DW6 */
# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14)
# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13)
# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12)
# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11)
# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10)
 
 
#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15)
#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16)
#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17)
 
#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18)
 
#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d)
/* DW1 */
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1)
# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1)
 
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
 
#define UF0_CS_REALLOC (1 << 13)
#define UF0_VFE_REALLOC (1 << 12)
#define UF0_SF_REALLOC (1 << 11)
#define UF0_CLIP_REALLOC (1 << 10)
#define UF0_GS_REALLOC (1 << 9)
#define UF0_VS_REALLOC (1 << 8)
#define UF1_CLIP_FENCE_SHIFT 20
#define UF1_GS_FENCE_SHIFT 10
#define UF1_VS_FENCE_SHIFT 0
#define UF2_CS_FENCE_SHIFT 20
#define UF2_VFE_FENCE_SHIFT 10
#define UF2_SF_FENCE_SHIFT 0
 
/* for GEN6_STATE_BASE_ADDRESS */
#define BASE_ADDRESS_MODIFY (1 << 0)
 
/* for GEN6_3DSTATE_PIPELINED_POINTERS */
#define GEN6_GS_DISABLE 0
#define GEN6_GS_ENABLE 1
#define GEN6_CLIP_DISABLE 0
#define GEN6_CLIP_ENABLE 1
 
/* for GEN6_PIPE_CONTROL */
#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0)
#define GEN6_PIPE_CONTROL_CS_STALL (1 << 20)
#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14)
#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14)
#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14)
#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14)
#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13)
#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12)
#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
 
/* 3DPRIMITIVE bits */
#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15)
#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15)
/* Primitive types are in gen6_defines.h */
#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10
 
#define GEN6_SVG_CTL 0x7400
 
#define GEN6_SVG_CTL_GS_BA (0 << 8)
#define GEN6_SVG_CTL_SS_BA (1 << 8)
#define GEN6_SVG_CTL_IO_BA (2 << 8)
#define GEN6_SVG_CTL_GS_AUB (3 << 8)
#define GEN6_SVG_CTL_IO_AUB (4 << 8)
#define GEN6_SVG_CTL_SIP (5 << 8)
 
#define GEN6_SVG_RDATA 0x7404
#define GEN6_SVG_WORK_CTL 0x7408
 
#define GEN6_VF_CTL 0x7500
 
#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8)
#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4)
#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4)
#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3)
#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2)
#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1)
#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VF_STRG_VAL 0x7504
#define GEN6_VF_STR_VL_OVR 0x7508
#define GEN6_VF_VC_OVR 0x750c
#define GEN6_VF_STR_PSKIP 0x7510
#define GEN6_VF_MAX_PRIM 0x7514
#define GEN6_VF_RDATA 0x7518
 
#define GEN6_VS_CTL 0x7600
#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8)
#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8)
#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_VS_STRG_VAL 0x7604
#define GEN6_VS_RDATA 0x7608
 
#define GEN6_SF_CTL 0x7b00
#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8)
#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8)
#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4)
#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3)
#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_SF_STRG_VAL 0x7b04
#define GEN6_SF_RDATA 0x7b18
 
#define GEN6_WIZ_CTL 0x7c00
#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8)
#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8)
#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6)
#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5)
#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4)
#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3)
#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2)
#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1)
#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_WIZ_STRG_VAL 0x7c04
#define GEN6_WIZ_RDATA 0x7c18
 
#define GEN6_TS_CTL 0x7e00
#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31)
#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8)
#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8)
#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2)
#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1)
#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0)
 
#define GEN6_TS_STRG_VAL 0x7e04
#define GEN6_TS_RDATA 0x7e08
 
#define GEN6_TD_CTL 0x8000
#define GEN6_TD_CTL_MUX_SHIFT 8
#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7)
#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6)
#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5)
#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4)
#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2)
#define GEN6_TD_CTL2 0x8004
#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28)
#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26)
#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25)
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16
#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8)
#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7)
#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6)
#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5)
#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4)
#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3)
#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0)
#define GEN6_TD_VF_VS_EMSK 0x8008
#define GEN6_TD_GS_EMSK 0x800c
#define GEN6_TD_CLIP_EMSK 0x8010
#define GEN6_TD_SF_EMSK 0x8014
#define GEN6_TD_WIZ_EMSK 0x8018
#define GEN6_TD_0_6_EHTRG_VAL 0x801c
#define GEN6_TD_0_7_EHTRG_VAL 0x8020
#define GEN6_TD_0_6_EHTRG_MSK 0x8024
#define GEN6_TD_0_7_EHTRG_MSK 0x8028
#define GEN6_TD_RDATA 0x802c
#define GEN6_TD_TS_EMSK 0x8030
 
#define GEN6_EU_CTL 0x8800
#define GEN6_EU_CTL_SELECT_SHIFT 16
#define GEN6_EU_CTL_DATA_MUX_SHIFT 8
#define GEN6_EU_ATT_0 0x8810
#define GEN6_EU_ATT_1 0x8814
#define GEN6_EU_ATT_DATA_0 0x8820
#define GEN6_EU_ATT_DATA_1 0x8824
#define GEN6_EU_ATT_CLR_0 0x8830
#define GEN6_EU_ATT_CLR_1 0x8834
#define GEN6_EU_RDATA 0x8840
 
/* 3D state:
*/
#define _3DOP_3DSTATE_PIPELINED 0x0
#define _3DOP_3DSTATE_NONPIPELINED 0x1
#define _3DOP_3DCONTROL 0x2
#define _3DOP_3DPRIMITIVE 0x3
 
#define _3DSTATE_PIPELINED_POINTERS 0x00
#define _3DSTATE_BINDING_TABLE_POINTERS 0x01
#define _3DSTATE_VERTEX_BUFFERS 0x08
#define _3DSTATE_VERTEX_ELEMENTS 0x09
#define _3DSTATE_INDEX_BUFFER 0x0A
#define _3DSTATE_VF_STATISTICS 0x0B
#define _3DSTATE_DRAWING_RECTANGLE 0x00
#define _3DSTATE_CONSTANT_COLOR 0x01
#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02
#define _3DSTATE_CHROMA_KEY 0x04
#define _3DSTATE_DEPTH_BUFFER 0x05
#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06
#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07
#define _3DSTATE_LINE_STIPPLE 0x08
#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09
#define _3DCONTROL 0x00
#define _3DPRIMITIVE 0x00
 
#define _3DPRIM_POINTLIST 0x01
#define _3DPRIM_LINELIST 0x02
#define _3DPRIM_LINESTRIP 0x03
#define _3DPRIM_TRILIST 0x04
#define _3DPRIM_TRISTRIP 0x05
#define _3DPRIM_TRIFAN 0x06
#define _3DPRIM_QUADLIST 0x07
#define _3DPRIM_QUADSTRIP 0x08
#define _3DPRIM_LINELIST_ADJ 0x09
#define _3DPRIM_LINESTRIP_ADJ 0x0A
#define _3DPRIM_TRILIST_ADJ 0x0B
#define _3DPRIM_TRISTRIP_ADJ 0x0C
#define _3DPRIM_TRISTRIP_REVERSE 0x0D
#define _3DPRIM_POLYGON 0x0E
#define _3DPRIM_RECTLIST 0x0F
#define _3DPRIM_LINELOOP 0x10
#define _3DPRIM_POINTLIST_BF 0x11
#define _3DPRIM_LINESTRIP_CONT 0x12
#define _3DPRIM_LINESTRIP_BF 0x13
#define _3DPRIM_LINESTRIP_CONT_BF 0x14
#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15
 
#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1
 
#define GEN6_ANISORATIO_2 0
#define GEN6_ANISORATIO_4 1
#define GEN6_ANISORATIO_6 2
#define GEN6_ANISORATIO_8 3
#define GEN6_ANISORATIO_10 4
#define GEN6_ANISORATIO_12 5
#define GEN6_ANISORATIO_14 6
#define GEN6_ANISORATIO_16 7
 
#define GEN6_BLENDFACTOR_ONE 0x1
#define GEN6_BLENDFACTOR_SRC_COLOR 0x2
#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3
#define GEN6_BLENDFACTOR_DST_ALPHA 0x4
#define GEN6_BLENDFACTOR_DST_COLOR 0x5
#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6
#define GEN6_BLENDFACTOR_CONST_COLOR 0x7
#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8
#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9
#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A
#define GEN6_BLENDFACTOR_ZERO 0x11
#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12
#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13
#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14
#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15
#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17
#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18
#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19
#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A
 
#define GEN6_BLENDFUNCTION_ADD 0
#define GEN6_BLENDFUNCTION_SUBTRACT 1
#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2
#define GEN6_BLENDFUNCTION_MIN 3
#define GEN6_BLENDFUNCTION_MAX 4
 
#define GEN6_ALPHATEST_FORMAT_UNORM8 0
#define GEN6_ALPHATEST_FORMAT_FLOAT32 1
 
#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0
#define GEN6_CHROMAKEY_REPLACE_BLACK 1
 
#define GEN6_CLIP_API_OGL 0
#define GEN6_CLIP_API_DX 1
 
#define GEN6_CLIPMODE_NORMAL 0
#define GEN6_CLIPMODE_CLIP_ALL 1
#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2
#define GEN6_CLIPMODE_REJECT_ALL 3
#define GEN6_CLIPMODE_ACCEPT_ALL 4
 
#define GEN6_CLIP_NDCSPACE 0
#define GEN6_CLIP_SCREENSPACE 1
 
#define GEN6_COMPAREFUNCTION_ALWAYS 0
#define GEN6_COMPAREFUNCTION_NEVER 1
#define GEN6_COMPAREFUNCTION_LESS 2
#define GEN6_COMPAREFUNCTION_EQUAL 3
#define GEN6_COMPAREFUNCTION_LEQUAL 4
#define GEN6_COMPAREFUNCTION_GREATER 5
#define GEN6_COMPAREFUNCTION_NOTEQUAL 6
#define GEN6_COMPAREFUNCTION_GEQUAL 7
 
#define GEN6_COVERAGE_PIXELS_HALF 0
#define GEN6_COVERAGE_PIXELS_1 1
#define GEN6_COVERAGE_PIXELS_2 2
#define GEN6_COVERAGE_PIXELS_4 3
 
#define GEN6_CULLMODE_BOTH 0
#define GEN6_CULLMODE_NONE 1
#define GEN6_CULLMODE_FRONT 2
#define GEN6_CULLMODE_BACK 3
 
#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0
#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1
 
#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0
#define GEN6_DEPTHFORMAT_D32_FLOAT 1
#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2
#define GEN6_DEPTHFORMAT_D16_UNORM 5
 
#define GEN6_FLOATING_POINT_IEEE_754 0
#define GEN6_FLOATING_POINT_NON_IEEE_754 1
 
#define GEN6_FRONTWINDING_CW 0
#define GEN6_FRONTWINDING_CCW 1
 
#define GEN6_INDEX_BYTE 0
#define GEN6_INDEX_WORD 1
#define GEN6_INDEX_DWORD 2
 
#define GEN6_LOGICOPFUNCTION_CLEAR 0
#define GEN6_LOGICOPFUNCTION_NOR 1
#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2
#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3
#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4
#define GEN6_LOGICOPFUNCTION_INVERT 5
#define GEN6_LOGICOPFUNCTION_XOR 6
#define GEN6_LOGICOPFUNCTION_NAND 7
#define GEN6_LOGICOPFUNCTION_AND 8
#define GEN6_LOGICOPFUNCTION_EQUIV 9
#define GEN6_LOGICOPFUNCTION_NOOP 10
#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11
#define GEN6_LOGICOPFUNCTION_COPY 12
#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13
#define GEN6_LOGICOPFUNCTION_OR 14
#define GEN6_LOGICOPFUNCTION_SET 15
 
#define GEN6_MAPFILTER_NEAREST 0x0
#define GEN6_MAPFILTER_LINEAR 0x1
#define GEN6_MAPFILTER_ANISOTROPIC 0x2
 
#define GEN6_MIPFILTER_NONE 0
#define GEN6_MIPFILTER_NEAREST 1
#define GEN6_MIPFILTER_LINEAR 3
 
#define GEN6_POLYGON_FRONT_FACING 0
#define GEN6_POLYGON_BACK_FACING 1
 
#define GEN6_PREFILTER_ALWAYS 0x0
#define GEN6_PREFILTER_NEVER 0x1
#define GEN6_PREFILTER_LESS 0x2
#define GEN6_PREFILTER_EQUAL 0x3
#define GEN6_PREFILTER_LEQUAL 0x4
#define GEN6_PREFILTER_GREATER 0x5
#define GEN6_PREFILTER_NOTEQUAL 0x6
#define GEN6_PREFILTER_GEQUAL 0x7
 
#define GEN6_PROVOKING_VERTEX_0 0
#define GEN6_PROVOKING_VERTEX_1 1
#define GEN6_PROVOKING_VERTEX_2 2
 
#define GEN6_RASTRULE_UPPER_LEFT 0
#define GEN6_RASTRULE_UPPER_RIGHT 1
 
#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0
#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1
#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2
 
#define GEN6_STENCILOP_KEEP 0
#define GEN6_STENCILOP_ZERO 1
#define GEN6_STENCILOP_REPLACE 2
#define GEN6_STENCILOP_INCRSAT 3
#define GEN6_STENCILOP_DECRSAT 4
#define GEN6_STENCILOP_INCR 5
#define GEN6_STENCILOP_DECR 6
#define GEN6_STENCILOP_INVERT 7
 
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
 
#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000
#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001
#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002
#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003
#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004
#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005
#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006
#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007
#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008
#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040
#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041
#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042
#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043
#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044
#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045
#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046
#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080
#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081
#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082
#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083
#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084
#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085
#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086
#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087
#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088
#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089
#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A
#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B
#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C
#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D
#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E
#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F
#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090
#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091
#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092
#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093
#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094
#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095
#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0
#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2
#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3
#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4
#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7
#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8
#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9
#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA
#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB
#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC
#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD
#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE
#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF
#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1
#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2
#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3
#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6
#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7
#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8
#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9
#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA
#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF
#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0
#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1
#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2
#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3
#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4
#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9
#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB
#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC
#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED
#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE
#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0
#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1
#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2
#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3
#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4
#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5
#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6
#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7
#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8
#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100
#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102
#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104
#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105
#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106
#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107
#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108
#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109
#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A
#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B
#define GEN6_SURFACEFORMAT_R16_SINT 0x10C
#define GEN6_SURFACEFORMAT_R16_UINT 0x10D
#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E
#define GEN6_SURFACEFORMAT_I16_UNORM 0x111
#define GEN6_SURFACEFORMAT_L16_UNORM 0x112
#define GEN6_SURFACEFORMAT_A16_UNORM 0x113
#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114
#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115
#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116
#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117
#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A
#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B
#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C
#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D
#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E
#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F
#define GEN6_SURFACEFORMAT_R8_UNORM 0x140
#define GEN6_SURFACEFORMAT_R8_SNORM 0x141
#define GEN6_SURFACEFORMAT_R8_SINT 0x142
#define GEN6_SURFACEFORMAT_R8_UINT 0x143
#define GEN6_SURFACEFORMAT_A8_UNORM 0x144
#define GEN6_SURFACEFORMAT_I8_UNORM 0x145
#define GEN6_SURFACEFORMAT_L8_UNORM 0x146
#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147
#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148
#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149
#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A
#define GEN6_SURFACEFORMAT_R1_UINT 0x181
#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183
#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186
#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187
#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188
#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189
#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A
#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B
#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C
#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D
#define GEN6_SURFACEFORMAT_MONO8 0x18E
#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F
#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190
#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191
#define GEN6_SURFACEFORMAT_FXT1 0x192
#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193
#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194
#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195
#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196
#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197
#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198
#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199
#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A
#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C
#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F
 
#define GEN6_SURFACERETURNFORMAT_FLOAT32 0
#define GEN6_SURFACERETURNFORMAT_S1 1
 
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
#define GEN6_BORDER_COLOR_MODE_DEFAULT 0
#define GEN6_BORDER_COLOR_MODE_LEGACY 1
 
#define GEN6_TEXCOORDMODE_WRAP 0
#define GEN6_TEXCOORDMODE_MIRROR 1
#define GEN6_TEXCOORDMODE_CLAMP 2
#define GEN6_TEXCOORDMODE_CUBE 3
#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4
#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5
 
#define GEN6_THREAD_PRIORITY_NORMAL 0
#define GEN6_THREAD_PRIORITY_HIGH 1
 
#define GEN6_TILEWALK_XMAJOR 0
#define GEN6_TILEWALK_YMAJOR 1
 
#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0
#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1
 
#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0
#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1
 
#define GEN6_VFCOMPONENT_NOSTORE 0
#define GEN6_VFCOMPONENT_STORE_SRC 1
#define GEN6_VFCOMPONENT_STORE_0 2
#define GEN6_VFCOMPONENT_STORE_1_FLT 3
#define GEN6_VFCOMPONENT_STORE_1_INT 4
#define GEN6_VFCOMPONENT_STORE_VID 5
#define GEN6_VFCOMPONENT_STORE_IID 6
#define GEN6_VFCOMPONENT_STORE_PID 7
 
 
 
/* Execution Unit (EU) defines
*/
 
#define GEN6_ALIGN_1 0
#define GEN6_ALIGN_16 1
 
#define GEN6_ADDRESS_DIRECT 0
#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1
 
#define GEN6_CHANNEL_X 0
#define GEN6_CHANNEL_Y 1
#define GEN6_CHANNEL_Z 2
#define GEN6_CHANNEL_W 3
 
#define GEN6_COMPRESSION_NONE 0
#define GEN6_COMPRESSION_2NDHALF 1
#define GEN6_COMPRESSION_COMPRESSED 2
 
#define GEN6_CONDITIONAL_NONE 0
#define GEN6_CONDITIONAL_Z 1
#define GEN6_CONDITIONAL_NZ 2
#define GEN6_CONDITIONAL_EQ 1 /* Z */
#define GEN6_CONDITIONAL_NEQ 2 /* NZ */
#define GEN6_CONDITIONAL_G 3
#define GEN6_CONDITIONAL_GE 4
#define GEN6_CONDITIONAL_L 5
#define GEN6_CONDITIONAL_LE 6
#define GEN6_CONDITIONAL_C 7
#define GEN6_CONDITIONAL_O 8
 
#define GEN6_DEBUG_NONE 0
#define GEN6_DEBUG_BREAKPOINT 1
 
#define GEN6_DEPENDENCY_NORMAL 0
#define GEN6_DEPENDENCY_NOTCLEARED 1
#define GEN6_DEPENDENCY_NOTCHECKED 2
#define GEN6_DEPENDENCY_DISABLE 3
 
#define GEN6_EXECUTE_1 0
#define GEN6_EXECUTE_2 1
#define GEN6_EXECUTE_4 2
#define GEN6_EXECUTE_8 3
#define GEN6_EXECUTE_16 4
#define GEN6_EXECUTE_32 5
 
#define GEN6_HORIZONTAL_STRIDE_0 0
#define GEN6_HORIZONTAL_STRIDE_1 1
#define GEN6_HORIZONTAL_STRIDE_2 2
#define GEN6_HORIZONTAL_STRIDE_4 3
 
#define GEN6_INSTRUCTION_NORMAL 0
#define GEN6_INSTRUCTION_SATURATE 1
 
#define GEN6_MASK_ENABLE 0
#define GEN6_MASK_DISABLE 1
 
#define GEN6_OPCODE_MOV 1
#define GEN6_OPCODE_SEL 2
#define GEN6_OPCODE_NOT 4
#define GEN6_OPCODE_AND 5
#define GEN6_OPCODE_OR 6
#define GEN6_OPCODE_XOR 7
#define GEN6_OPCODE_SHR 8
#define GEN6_OPCODE_SHL 9
#define GEN6_OPCODE_RSR 10
#define GEN6_OPCODE_RSL 11
#define GEN6_OPCODE_ASR 12
#define GEN6_OPCODE_CMP 16
#define GEN6_OPCODE_JMPI 32
#define GEN6_OPCODE_IF 34
#define GEN6_OPCODE_IFF 35
#define GEN6_OPCODE_ELSE 36
#define GEN6_OPCODE_ENDIF 37
#define GEN6_OPCODE_DO 38
#define GEN6_OPCODE_WHILE 39
#define GEN6_OPCODE_BREAK 40
#define GEN6_OPCODE_CONTINUE 41
#define GEN6_OPCODE_HALT 42
#define GEN6_OPCODE_MSAVE 44
#define GEN6_OPCODE_MRESTORE 45
#define GEN6_OPCODE_PUSH 46
#define GEN6_OPCODE_POP 47
#define GEN6_OPCODE_WAIT 48
#define GEN6_OPCODE_SEND 49
#define GEN6_OPCODE_ADD 64
#define GEN6_OPCODE_MUL 65
#define GEN6_OPCODE_AVG 66
#define GEN6_OPCODE_FRC 67
#define GEN6_OPCODE_RNDU 68
#define GEN6_OPCODE_RNDD 69
#define GEN6_OPCODE_RNDE 70
#define GEN6_OPCODE_RNDZ 71
#define GEN6_OPCODE_MAC 72
#define GEN6_OPCODE_MACH 73
#define GEN6_OPCODE_LZD 74
#define GEN6_OPCODE_SAD2 80
#define GEN6_OPCODE_SADA2 81
#define GEN6_OPCODE_DP4 84
#define GEN6_OPCODE_DPH 85
#define GEN6_OPCODE_DP3 86
#define GEN6_OPCODE_DP2 87
#define GEN6_OPCODE_DPA2 88
#define GEN6_OPCODE_LINE 89
#define GEN6_OPCODE_NOP 126
 
#define GEN6_PREDICATE_NONE 0
#define GEN6_PREDICATE_NORMAL 1
#define GEN6_PREDICATE_ALIGN1_ANYV 2
#define GEN6_PREDICATE_ALIGN1_ALLV 3
#define GEN6_PREDICATE_ALIGN1_ANY2H 4
#define GEN6_PREDICATE_ALIGN1_ALL2H 5
#define GEN6_PREDICATE_ALIGN1_ANY4H 6
#define GEN6_PREDICATE_ALIGN1_ALL4H 7
#define GEN6_PREDICATE_ALIGN1_ANY8H 8
#define GEN6_PREDICATE_ALIGN1_ALL8H 9
#define GEN6_PREDICATE_ALIGN1_ANY16H 10
#define GEN6_PREDICATE_ALIGN1_ALL16H 11
#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3
#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4
#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5
#define GEN6_PREDICATE_ALIGN16_ANY4H 6
#define GEN6_PREDICATE_ALIGN16_ALL4H 7
 
#define GEN6_ARCHITECTURE_REGISTER_FILE 0
#define GEN6_GENERAL_REGISTER_FILE 1
#define GEN6_MESSAGE_REGISTER_FILE 2
#define GEN6_IMMEDIATE_VALUE 3
 
#define GEN6_REGISTER_TYPE_UD 0
#define GEN6_REGISTER_TYPE_D 1
#define GEN6_REGISTER_TYPE_UW 2
#define GEN6_REGISTER_TYPE_W 3
#define GEN6_REGISTER_TYPE_UB 4
#define GEN6_REGISTER_TYPE_B 5
#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */
#define GEN6_REGISTER_TYPE_HF 6
#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */
#define GEN6_REGISTER_TYPE_F 7
 
#define GEN6_ARF_NULL 0x00
#define GEN6_ARF_ADDRESS 0x10
#define GEN6_ARF_ACCUMULATOR 0x20
#define GEN6_ARF_FLAG 0x30
#define GEN6_ARF_MASK 0x40
#define GEN6_ARF_MASK_STACK 0x50
#define GEN6_ARF_MASK_STACK_DEPTH 0x60
#define GEN6_ARF_STATE 0x70
#define GEN6_ARF_CONTROL 0x80
#define GEN6_ARF_NOTIFICATION_COUNT 0x90
#define GEN6_ARF_IP 0xA0
 
#define GEN6_AMASK 0
#define GEN6_IMASK 1
#define GEN6_LMASK 2
#define GEN6_CMASK 3
 
 
 
#define GEN6_THREAD_NORMAL 0
#define GEN6_THREAD_ATOMIC 1
#define GEN6_THREAD_SWITCH 2
 
#define GEN6_VERTICAL_STRIDE_0 0
#define GEN6_VERTICAL_STRIDE_1 1
#define GEN6_VERTICAL_STRIDE_2 2
#define GEN6_VERTICAL_STRIDE_4 3
#define GEN6_VERTICAL_STRIDE_8 4
#define GEN6_VERTICAL_STRIDE_16 5
#define GEN6_VERTICAL_STRIDE_32 6
#define GEN6_VERTICAL_STRIDE_64 7
#define GEN6_VERTICAL_STRIDE_128 8
#define GEN6_VERTICAL_STRIDE_256 9
#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF
 
#define GEN6_WIDTH_1 0
#define GEN6_WIDTH_2 1
#define GEN6_WIDTH_4 2
#define GEN6_WIDTH_8 3
#define GEN6_WIDTH_16 4
 
#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0
#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1
#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2
#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3
#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4
#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5
#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6
#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7
#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8
#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9
#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10
#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11
 
#define GEN6_POLYGON_FACING_FRONT 0
#define GEN6_POLYGON_FACING_BACK 1
 
#define GEN6_MESSAGE_TARGET_NULL 0
#define GEN6_MESSAGE_TARGET_MATH 1
#define GEN6_MESSAGE_TARGET_SAMPLER 2
#define GEN6_MESSAGE_TARGET_GATEWAY 3
#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4
#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5
#define GEN6_MESSAGE_TARGET_URB 6
#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7
 
#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0
#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2
#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3
 
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0
#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0
#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2
#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3
#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3
 
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0
#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1
#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4
 
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2
 
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2
#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3
 
#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0
#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1
#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2
 
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3
#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4
 
#define GEN6_MATH_INTEGER_UNSIGNED 0
#define GEN6_MATH_INTEGER_SIGNED 1
 
#define GEN6_MATH_PRECISION_FULL 0
#define GEN6_MATH_PRECISION_PARTIAL 1
 
#define GEN6_MATH_SATURATE_NONE 0
#define GEN6_MATH_SATURATE_SATURATE 1
 
#define GEN6_MATH_DATA_VECTOR 0
#define GEN6_MATH_DATA_SCALAR 1
 
#define GEN6_URB_OPCODE_WRITE 0
 
#define GEN6_URB_SWIZZLE_NONE 0
#define GEN6_URB_SWIZZLE_INTERLEAVE 1
#define GEN6_URB_SWIZZLE_TRANSPOSE 2
 
#define GEN6_SCRATCH_SPACE_SIZE_1K 0
#define GEN6_SCRATCH_SPACE_SIZE_2K 1
#define GEN6_SCRATCH_SPACE_SIZE_4K 2
#define GEN6_SCRATCH_SPACE_SIZE_8K 3
#define GEN6_SCRATCH_SPACE_SIZE_16K 4
#define GEN6_SCRATCH_SPACE_SIZE_32K 5
#define GEN6_SCRATCH_SPACE_SIZE_64K 6
#define GEN6_SCRATCH_SPACE_SIZE_128K 7
#define GEN6_SCRATCH_SPACE_SIZE_256K 8
#define GEN6_SCRATCH_SPACE_SIZE_512K 9
#define GEN6_SCRATCH_SPACE_SIZE_1M 10
#define GEN6_SCRATCH_SPACE_SIZE_2M 11
 
/* The hardware supports two different modes for border color. The
* default (OpenGL) mode uses floating-point color channels, while the
* legacy mode uses 4 bytes.
*
* More significantly, the legacy mode respects the components of the
* border color for channels not present in the source, (whereas the
* default mode will ignore the border color's alpha channel and use
* alpha==1 for an RGB source, for example).
*
* The legacy mode matches the semantics specified by the Render
* extension.
*/
struct gen6_sampler_default_border_color {
float color[4];
};
 
struct gen6_sampler_legacy_border_color {
uint8_t color[4];
};
 
struct gen6_sampler_state {
struct {
uint32_t shadow_function:3;
uint32_t lod_bias:11;
uint32_t min_filter:3;
uint32_t mag_filter:3;
uint32_t mip_filter:2;
uint32_t base_level:5;
uint32_t pad:1;
uint32_t lod_preclamp:1;
uint32_t border_color_mode:1;
uint32_t pad0:1;
uint32_t disable:1;
} ss0;
 
struct {
uint32_t r_wrap_mode:3;
uint32_t t_wrap_mode:3;
uint32_t s_wrap_mode:3;
uint32_t pad:3;
uint32_t max_lod:10;
uint32_t min_lod:10;
} ss1;
 
struct {
uint32_t border_color;
} ss2;
 
struct {
uint32_t non_normalized_coord:1;
uint32_t pad:12;
uint32_t address_round:6;
uint32_t max_aniso:3;
uint32_t chroma_key_mode:1;
uint32_t chroma_key_index:2;
uint32_t chroma_key_enable:1;
uint32_t monochrome_filter_width:3;
uint32_t monochrome_filter_height:3;
} ss3;
};
 
struct gen6_blend_state {
struct {
uint32_t dest_blend_factor:5;
uint32_t source_blend_factor:5;
uint32_t pad3:1;
uint32_t blend_func:3;
uint32_t pad2:1;
uint32_t ia_dest_blend_factor:5;
uint32_t ia_source_blend_factor:5;
uint32_t pad1:1;
uint32_t ia_blend_func:3;
uint32_t pad0:1;
uint32_t ia_blend_enable:1;
uint32_t blend_enable:1;
} blend0;
 
struct {
uint32_t post_blend_clamp_enable:1;
uint32_t pre_blend_clamp_enable:1;
uint32_t clamp_range:2;
uint32_t pad0:4;
uint32_t x_dither_offset:2;
uint32_t y_dither_offset:2;
uint32_t dither_enable:1;
uint32_t alpha_test_func:3;
uint32_t alpha_test_enable:1;
uint32_t pad1:1;
uint32_t logic_op_func:4;
uint32_t logic_op_enable:1;
uint32_t pad2:1;
uint32_t write_disable_b:1;
uint32_t write_disable_g:1;
uint32_t write_disable_r:1;
uint32_t write_disable_a:1;
uint32_t pad3:1;
uint32_t alpha_to_coverage_dither:1;
uint32_t alpha_to_one:1;
uint32_t alpha_to_coverage:1;
} blend1;
};
 
struct gen6_color_calc_state {
struct {
uint32_t alpha_test_format:1;
uint32_t pad0:14;
uint32_t round_disable:1;
uint32_t bf_stencil_ref:8;
uint32_t stencil_ref:8;
} cc0;
 
union {
float alpha_ref_f;
struct {
uint32_t ui:8;
uint32_t pad0:24;
} alpha_ref_fi;
} cc1;
 
float constant_r;
float constant_g;
float constant_b;
float constant_a;
};
 
struct gen6_depth_stencil_state {
struct {
uint32_t pad0:3;
uint32_t bf_stencil_pass_depth_pass_op:3;
uint32_t bf_stencil_pass_depth_fail_op:3;
uint32_t bf_stencil_fail_op:3;
uint32_t bf_stencil_func:3;
uint32_t bf_stencil_enable:1;
uint32_t pad1:2;
uint32_t stencil_write_enable:1;
uint32_t stencil_pass_depth_pass_op:3;
uint32_t stencil_pass_depth_fail_op:3;
uint32_t stencil_fail_op:3;
uint32_t stencil_func:3;
uint32_t stencil_enable:1;
} ds0;
 
struct {
uint32_t bf_stencil_write_mask:8;
uint32_t bf_stencil_test_mask:8;
uint32_t stencil_write_mask:8;
uint32_t stencil_test_mask:8;
} ds1;
 
struct {
uint32_t pad0:26;
uint32_t depth_write_enable:1;
uint32_t depth_test_func:3;
uint32_t pad1:1;
uint32_t depth_test_enable:1;
} ds2;
};
 
struct gen6_surface_state {
struct {
uint32_t cube_pos_z:1;
uint32_t cube_neg_z:1;
uint32_t cube_pos_y:1;
uint32_t cube_neg_y:1;
uint32_t cube_pos_x:1;
uint32_t cube_neg_x:1;
uint32_t pad:3;
uint32_t render_cache_read_mode:1;
uint32_t mipmap_layout_mode:1;
uint32_t vert_line_stride_ofs:1;
uint32_t vert_line_stride:1;
uint32_t color_blend:1;
uint32_t writedisable_blue:1;
uint32_t writedisable_green:1;
uint32_t writedisable_red:1;
uint32_t writedisable_alpha:1;
uint32_t surface_format:9;
uint32_t data_return_format:1;
uint32_t pad0:1;
uint32_t surface_type:3;
} ss0;
 
struct {
uint32_t base_addr;
} ss1;
 
struct {
uint32_t render_target_rotation:2;
uint32_t mip_count:4;
uint32_t width:13;
uint32_t height:13;
} ss2;
 
struct {
uint32_t tile_walk:1;
uint32_t tiled_surface:1;
uint32_t pad:1;
uint32_t pitch:18;
uint32_t depth:11;
} ss3;
 
struct {
uint32_t pad:19;
uint32_t min_array_elt:9;
uint32_t min_lod:4;
} ss4;
 
struct {
uint32_t pad:20;
uint32_t y_offset:4;
uint32_t pad2:1;
uint32_t x_offset:7;
} ss5;
};
 
/* Surface state DW0 */
#define GEN6_SURFACE_RC_READ_WRITE (1 << 8)
#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10
#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0
#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1
#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f
#define GEN6_SURFACE_BLEND_ENABLED (1 << 13)
#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14
#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15
#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16
#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17
#define GEN6_SURFACE_FORMAT_SHIFT 18
#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18)
 
#define GEN6_SURFACE_TYPE_SHIFT 29
#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29)
#define GEN6_SURFACE_1D 0
#define GEN6_SURFACE_2D 1
#define GEN6_SURFACE_3D 2
#define GEN6_SURFACE_CUBE 3
#define GEN6_SURFACE_BUFFER 4
#define GEN6_SURFACE_NULL 7
 
/* Surface state DW2 */
#define GEN6_SURFACE_HEIGHT_SHIFT 19
#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19)
#define GEN6_SURFACE_WIDTH_SHIFT 6
#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6)
#define GEN6_SURFACE_LOD_SHIFT 2
#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2)
 
/* Surface state DW3 */
#define GEN6_SURFACE_DEPTH_SHIFT 21
#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21)
#define GEN6_SURFACE_PITCH_SHIFT 3
#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3)
#define GEN6_SURFACE_TILED (1 << 1)
#define GEN6_SURFACE_TILED_Y (1 << 0)
 
/* Surface state DW4 */
#define GEN6_SURFACE_MIN_LOD_SHIFT 28
#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28)
 
/* Surface state DW5 */
#define GEN6_SURFACE_X_OFFSET_SHIFT 25
#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25)
#define GEN6_SURFACE_Y_OFFSET_SHIFT 20
#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20)
 
struct gen6_cc_viewport {
float min_depth;
float max_depth;
};
 
typedef enum {
SAMPLER_FILTER_NEAREST = 0,
SAMPLER_FILTER_BILINEAR,
FILTER_COUNT
} sampler_filter_t;
 
typedef enum {
SAMPLER_EXTEND_NONE = 0,
SAMPLER_EXTEND_REPEAT,
SAMPLER_EXTEND_PAD,
SAMPLER_EXTEND_REFLECT,
EXTEND_COUNT
} sampler_extend_t;
 
#endif
/drivers/video/Intel-2D/i915_drm.h
0,0 → 1,944
/*
* Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
 
#ifndef _I915_DRM_H_
#define _I915_DRM_H_
 
#include "drm.h"
 
/* Please note that modifications to all structs defined here are
* subject to backwards-compatibility constraints.
*/
 
 
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
* of chars for next/prev indices */
#define I915_LOG_MIN_TEX_REGION_SIZE 14
 
typedef struct _drm_i915_init {
enum {
I915_INIT_DMA = 0x01,
I915_CLEANUP_DMA = 0x02,
I915_RESUME_DMA = 0x03
} func;
unsigned int mmio_offset;
int sarea_priv_offset;
unsigned int ring_start;
unsigned int ring_end;
unsigned int ring_size;
unsigned int front_offset;
unsigned int back_offset;
unsigned int depth_offset;
unsigned int w;
unsigned int h;
unsigned int pitch;
unsigned int pitch_bits;
unsigned int back_pitch;
unsigned int depth_pitch;
unsigned int cpp;
unsigned int chipset;
} drm_i915_init_t;
 
typedef struct _drm_i915_sarea {
struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1];
int last_upload; /* last time texture was uploaded */
int last_enqueue; /* last time a buffer was enqueued */
int last_dispatch; /* age of the most recently dispatched buffer */
int ctxOwner; /* last context to upload state */
int texAge;
int pf_enabled; /* is pageflipping allowed? */
int pf_active;
int pf_current_page; /* which buffer is being displayed? */
int perf_boxes; /* performance boxes to be displayed */
int width, height; /* screen size in pixels */
 
drm_handle_t front_handle;
int front_offset;
int front_size;
 
drm_handle_t back_handle;
int back_offset;
int back_size;
 
drm_handle_t depth_handle;
int depth_offset;
int depth_size;
 
drm_handle_t tex_handle;
int tex_offset;
int tex_size;
int log_tex_granularity;
int pitch;
int rotation; /* 0, 90, 180 or 270 */
int rotated_offset;
int rotated_size;
int rotated_pitch;
int virtualX, virtualY;
 
unsigned int front_tiled;
unsigned int back_tiled;
unsigned int depth_tiled;
unsigned int rotated_tiled;
unsigned int rotated2_tiled;
 
int pipeA_x;
int pipeA_y;
int pipeA_w;
int pipeA_h;
int pipeB_x;
int pipeB_y;
int pipeB_w;
int pipeB_h;
 
/* fill out some space for old userspace triple buffer */
drm_handle_t unused_handle;
__u32 unused1, unused2, unused3;
 
/* buffer object handles for static buffers. May change
* over the lifetime of the client.
*/
__u32 front_bo_handle;
__u32 back_bo_handle;
__u32 unused_bo_handle;
__u32 depth_bo_handle;
 
} drm_i915_sarea_t;
 
/* due to userspace building against these headers we need some compat here */
#define planeA_x pipeA_x
#define planeA_y pipeA_y
#define planeA_w pipeA_w
#define planeA_h pipeA_h
#define planeB_x pipeB_x
#define planeB_y pipeB_y
#define planeB_w pipeB_w
#define planeB_h pipeB_h
 
/* Flags for perf_boxes
*/
#define I915_BOX_RING_EMPTY 0x1
#define I915_BOX_FLIP 0x2
#define I915_BOX_WAIT 0x4
#define I915_BOX_TEXTURE_LOAD 0x8
#define I915_BOX_LOST_CONTEXT 0x10
 
/* I915 specific ioctls
* The device specific ioctl range is 0x40 to 0x79.
*/
#define DRM_I915_INIT 0x00
#define DRM_I915_FLUSH 0x01
#define DRM_I915_FLIP 0x02
#define DRM_I915_BATCHBUFFER 0x03
#define DRM_I915_IRQ_EMIT 0x04
#define DRM_I915_IRQ_WAIT 0x05
#define DRM_I915_GETPARAM 0x06
#define DRM_I915_SETPARAM 0x07
#define DRM_I915_ALLOC 0x08
#define DRM_I915_FREE 0x09
#define DRM_I915_INIT_HEAP 0x0a
#define DRM_I915_CMDBUFFER 0x0b
#define DRM_I915_DESTROY_HEAP 0x0c
#define DRM_I915_SET_VBLANK_PIPE 0x0d
#define DRM_I915_GET_VBLANK_PIPE 0x0e
#define DRM_I915_VBLANK_SWAP 0x0f
#define DRM_I915_HWS_ADDR 0x11
#define DRM_I915_GEM_INIT 0x13
#define DRM_I915_GEM_EXECBUFFER 0x14
#define DRM_I915_GEM_PIN 0x15
#define DRM_I915_GEM_UNPIN 0x16
#define DRM_I915_GEM_BUSY 0x17
#define DRM_I915_GEM_THROTTLE 0x18
#define DRM_I915_GEM_ENTERVT 0x19
#define DRM_I915_GEM_LEAVEVT 0x1a
#define DRM_I915_GEM_CREATE 0x1b
#define DRM_I915_GEM_PREAD 0x1c
#define DRM_I915_GEM_PWRITE 0x1d
#define DRM_I915_GEM_MMAP 0x1e
#define DRM_I915_GEM_SET_DOMAIN 0x1f
#define DRM_I915_GEM_SW_FINISH 0x20
#define DRM_I915_GEM_SET_TILING 0x21
#define DRM_I915_GEM_GET_TILING 0x22
#define DRM_I915_GEM_GET_APERTURE 0x23
#define DRM_I915_GEM_MMAP_GTT 0x24
#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
#define DRM_I915_GEM_MADVISE 0x26
#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
#define DRM_I915_OVERLAY_ATTRS 0x28
#define DRM_I915_GEM_EXECBUFFER2 0x29
#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
#define DRM_I915_GEM_WAIT 0x2c
#define DRM_I915_GEM_CONTEXT_CREATE 0x2d
#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e
#define DRM_I915_GEM_SET_CACHEING 0x2f
#define DRM_I915_GEM_GET_CACHEING 0x30
#define DRM_I915_REG_READ 0x31
 
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP)
#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t)
#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t)
#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t)
#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t)
#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t)
#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t)
#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t)
#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t)
#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t)
#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t)
#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
#define DRM_IOCTL_I915_GEM_SET_CACHEING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHEING, struct drm_i915_gem_cacheing)
#define DRM_IOCTL_I915_GEM_GET_CACHEING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHEING, struct drm_i915_gem_cacheing)
#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE)
#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id)
#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
*/
typedef struct drm_i915_batchbuffer {
int start; /* agp offset */
int used; /* nr bytes in use */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */
} drm_i915_batchbuffer_t;
 
/* As above, but pass a pointer to userspace buffer which can be
* validated by the kernel prior to sending to hardware.
*/
typedef struct _drm_i915_cmdbuffer {
char *buf; /* pointer to userspace command buffer */
int sz; /* nr bytes in buf */
int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
int num_cliprects; /* mulitpass with multiple cliprects? */
struct drm_clip_rect *cliprects; /* pointer to userspace cliprects */
} drm_i915_cmdbuffer_t;
 
/* Userspace can request & wait on irq's:
*/
typedef struct drm_i915_irq_emit {
int *irq_seq;
} drm_i915_irq_emit_t;
 
typedef struct drm_i915_irq_wait {
int irq_seq;
} drm_i915_irq_wait_t;
 
/* Ioctl to query kernel params:
*/
#define I915_PARAM_IRQ_ACTIVE 1
#define I915_PARAM_ALLOW_BATCHBUFFER 2
#define I915_PARAM_LAST_DISPATCH 3
#define I915_PARAM_CHIPSET_ID 4
#define I915_PARAM_HAS_GEM 5
#define I915_PARAM_NUM_FENCES_AVAIL 6
#define I915_PARAM_HAS_OVERLAY 7
#define I915_PARAM_HAS_PAGEFLIPPING 8
#define I915_PARAM_HAS_EXECBUF2 9
#define I915_PARAM_HAS_BSD 10
#define I915_PARAM_HAS_BLT 11
#define I915_PARAM_HAS_RELAXED_FENCING 12
#define I915_PARAM_HAS_COHERENT_RINGS 13
#define I915_PARAM_HAS_EXEC_CONSTANTS 14
#define I915_PARAM_HAS_RELAXED_DELTA 15
#define I915_PARAM_HAS_GEN7_SOL_RESET 16
#define I915_PARAM_HAS_LLC 17
#define I915_PARAM_HAS_ALIASING_PPGTT 18
#define I915_PARAM_HAS_WAIT_TIMEOUT 19
 
typedef struct drm_i915_getparam {
int param;
int *value;
} drm_i915_getparam_t;
 
/* Ioctl to set kernel params:
*/
#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
#define I915_SETPARAM_ALLOW_BATCHBUFFER 3
#define I915_SETPARAM_NUM_USED_FENCES 4
 
typedef struct drm_i915_setparam {
int param;
int value;
} drm_i915_setparam_t;
 
/* A memory manager for regions of shared memory:
*/
#define I915_MEM_REGION_AGP 1
 
typedef struct drm_i915_mem_alloc {
int region;
int alignment;
int size;
int *region_offset; /* offset from start of fb or agp */
} drm_i915_mem_alloc_t;
 
typedef struct drm_i915_mem_free {
int region;
int region_offset;
} drm_i915_mem_free_t;
 
typedef struct drm_i915_mem_init_heap {
int region;
int size;
int start;
} drm_i915_mem_init_heap_t;
 
/* Allow memory manager to be torn down and re-initialized (eg on
* rotate):
*/
typedef struct drm_i915_mem_destroy_heap {
int region;
} drm_i915_mem_destroy_heap_t;
 
/* Allow X server to configure which pipes to monitor for vblank signals
*/
#define DRM_I915_VBLANK_PIPE_A 1
#define DRM_I915_VBLANK_PIPE_B 2
 
typedef struct drm_i915_vblank_pipe {
int pipe;
} drm_i915_vblank_pipe_t;
 
/* Schedule buffer swap at given vertical blank:
*/
typedef struct drm_i915_vblank_swap {
drm_drawable_t drawable;
enum drm_vblank_seq_type seqtype;
unsigned int sequence;
} drm_i915_vblank_swap_t;
 
typedef struct drm_i915_hws_addr {
__u64 addr;
} drm_i915_hws_addr_t;
 
struct drm_i915_gem_init {
/**
* Beginning offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_start;
/**
* Ending offset in the GTT to be managed by the DRM memory
* manager.
*/
__u64 gtt_end;
};
 
struct drm_i915_gem_create {
/**
* Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*/
__u64 size;
/**
* Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
__u32 pad;
};
 
struct drm_i915_gem_pread {
/** Handle for the object being read. */
__u32 handle;
__u32 pad;
/** Offset into the object to read from */
__u64 offset;
/** Length of data to read */
__u64 size;
/**
* Pointer to write the data into.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
 
struct drm_i915_gem_pwrite {
/** Handle for the object being written to. */
__u32 handle;
__u32 pad;
/** Offset into the object to write to */
__u64 offset;
/** Length of data to write */
__u64 size;
/**
* Pointer to read the data from.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 data_ptr;
};
 
struct drm_i915_gem_mmap {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/** Offset in the object to map. */
__u64 offset;
/**
* Length of data to map.
*
* The value will be page-aligned.
*/
__u64 size;
/**
* Returned pointer the data was mapped at.
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 addr_ptr;
};
 
struct drm_i915_gem_mmap_gtt {
/** Handle for the object being mapped. */
__u32 handle;
__u32 pad;
/**
* Fake offset to use for subsequent mmap call
*
* This is a fixed-size type for 32/64 compatibility.
*/
__u64 offset;
};
 
struct drm_i915_gem_set_domain {
/** Handle for the object */
__u32 handle;
 
/** New read domains */
__u32 read_domains;
 
/** New write domain */
__u32 write_domain;
};
 
struct drm_i915_gem_sw_finish {
/** Handle for the object */
__u32 handle;
};
 
struct drm_i915_gem_relocation_entry {
/**
* Handle of the buffer being pointed to by this relocation entry.
*
* It's appealing to make this be an index into the mm_validate_entry
* list to refer to the buffer, but this allows the driver to create
* a relocation list for state buffers and not re-write it per
* exec using the buffer.
*/
__u32 target_handle;
 
/**
* Value to be added to the offset of the target buffer to make up
* the relocation entry.
*/
__u32 delta;
 
/** Offset in the buffer the relocation entry will be written into */
__u64 offset;
 
/**
* Offset value of the target buffer that the relocation entry was last
* written as.
*
* If the buffer has the same offset as last time, we can skip syncing
* and writing the relocation. This value is written back out by
* the execbuffer ioctl when the relocation is written.
*/
__u64 presumed_offset;
 
/**
* Target memory domains read by this operation.
*/
__u32 read_domains;
 
/**
* Target memory domains written by this operation.
*
* Note that only one domain may be written by the whole
* execbuffer operation, so that where there are conflicts,
* the application will get -EINVAL back.
*/
__u32 write_domain;
};
 
/** @{
* Intel memory domains
*
* Most of these just align with the various caches in
* the system and are used to flush and invalidate as
* objects end up cached in different domains.
*/
/** CPU cache */
#define I915_GEM_DOMAIN_CPU 0x00000001
/** Render cache, used by 2D and 3D drawing */
#define I915_GEM_DOMAIN_RENDER 0x00000002
/** Sampler cache, used by texture engine */
#define I915_GEM_DOMAIN_SAMPLER 0x00000004
/** Command queue, used to load batch buffers */
#define I915_GEM_DOMAIN_COMMAND 0x00000008
/** Instruction cache, used by shader programs */
#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010
/** Vertex address cache */
#define I915_GEM_DOMAIN_VERTEX 0x00000020
/** GTT domain - aperture and scanout */
#define I915_GEM_DOMAIN_GTT 0x00000040
/** @} */
 
struct drm_i915_gem_exec_object {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
 
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
 
/** Required alignment in graphics aperture */
__u64 alignment;
 
/**
* Returned value of the updated offset of the object, for future
* presumed_offset writes.
*/
__u64 offset;
};
 
struct drm_i915_gem_execbuffer {
/**
* List of buffers to be validated with their relocations to be
* performend on them.
*
* This is a pointer to an array of struct drm_i915_gem_validate_entry.
*
* These buffers must be listed in an order such that all relocations
* a buffer is performing refer to buffers that have already appeared
* in the validate list.
*/
__u64 buffers_ptr;
__u32 buffer_count;
 
/** Offset in the batchbuffer to start execution from. */
__u32 batch_start_offset;
/** Bytes used in batchbuffer from batch_start_offset */
__u32 batch_len;
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
};
 
struct drm_i915_gem_exec_object2 {
/**
* User's handle for a buffer to be bound into the GTT for this
* operation.
*/
__u32 handle;
 
/** Number of relocations to be performed on this buffer */
__u32 relocation_count;
/**
* Pointer to array of struct drm_i915_gem_relocation_entry containing
* the relocations to be performed in this buffer.
*/
__u64 relocs_ptr;
 
/** Required alignment in graphics aperture */
__u64 alignment;
 
/**
* Returned value of the updated offset of the object, for future
* presumed_offset writes.
*/
__u64 offset;
 
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
__u64 flags;
__u64 rsvd1;
__u64 rsvd2;
};
 
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
*/
__u64 buffers_ptr;
__u32 buffer_count;
 
/** Offset in the batchbuffer to start execution from. */
__u32 batch_start_offset;
/** Bytes used in batchbuffer from batch_start_offset */
__u32 batch_len;
__u32 DR1;
__u32 DR4;
__u32 num_cliprects;
/** This is a struct drm_clip_rect *cliprects */
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (7<<0)
#define I915_EXEC_DEFAULT (0<<0)
#define I915_EXEC_RENDER (1<<0)
#define I915_EXEC_BSD (2<<0)
#define I915_EXEC_BLT (3<<0)
 
/* Used for switching the constants addressing mode on gen4+ RENDER ring.
* Gen6+ only supports relative addressing to dynamic state (default) and
* absolute addressing.
*
* These flags are ignored for the BSD and BLT rings.
*/
#define I915_EXEC_CONSTANTS_MASK (3<<6)
#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
__u64 flags;
__u64 rsvd1; /* now used for context info */
__u64 rsvd2;
};
 
/** Resets the SO write offset registers for transform feedback on gen7. */
#define I915_EXEC_GEN7_SOL_RESET (1<<8)
 
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
#define i915_execbuffer2_get_context_id(eb2) \
((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
__u32 pad;
 
/** alignment required within the aperture */
__u64 alignment;
 
/** Returned GTT offset of the buffer. */
__u64 offset;
};
 
struct drm_i915_gem_unpin {
/** Handle of the buffer to be unpinned. */
__u32 handle;
__u32 pad;
};
 
struct drm_i915_gem_busy {
/** Handle of the buffer to check for busy */
__u32 handle;
 
/** Return busy status (1 if busy, 0 if idle).
* The high word is used to indicate on which rings the object
* currently resides:
* 16:31 - busy (r or r/w) rings (16 render, 17 bsd, 18 blt, etc)
*/
__u32 busy;
};
 
#define I915_CACHEING_NONE 0
#define I915_CACHEING_CACHED 1
 
struct drm_i915_gem_cacheing {
/**
* Handle of the buffer to set/get the cacheing level of. */
__u32 handle;
 
/**
* Cacheing level to apply or return value
*
* bits0-15 are for generic cacheing control (i.e. the above defined
* values). bits16-31 are reserved for platform-specific variations
* (e.g. l3$ caching on gen7). */
__u32 cacheing;
};
 
#define I915_TILING_NONE 0
#define I915_TILING_X 1
#define I915_TILING_Y 2
 
#define I915_BIT_6_SWIZZLE_NONE 0
#define I915_BIT_6_SWIZZLE_9 1
#define I915_BIT_6_SWIZZLE_9_10 2
#define I915_BIT_6_SWIZZLE_9_11 3
#define I915_BIT_6_SWIZZLE_9_10_11 4
/* Not seen by userland */
#define I915_BIT_6_SWIZZLE_UNKNOWN 5
/* Seen by userland. */
#define I915_BIT_6_SWIZZLE_9_17 6
#define I915_BIT_6_SWIZZLE_9_10_17 7
 
struct drm_i915_gem_set_tiling {
/** Handle of the buffer to have its tiling state updated */
__u32 handle;
 
/**
* Tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*
* This value is to be set on request, and will be updated by the
* kernel on successful return with the actual chosen tiling layout.
*
* The tiling mode may be demoted to I915_TILING_NONE when the system
* has bit 6 swizzling that can't be managed correctly by GEM.
*
* Buffer contents become undefined when changing tiling_mode.
*/
__u32 tiling_mode;
 
/**
* Stride in bytes for the object when in I915_TILING_X or
* I915_TILING_Y.
*/
__u32 stride;
 
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
};
 
struct drm_i915_gem_get_tiling {
/** Handle of the buffer to get tiling state for. */
__u32 handle;
 
/**
* Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
* I915_TILING_Y).
*/
__u32 tiling_mode;
 
/**
* Returned address bit 6 swizzling required for CPU access through
* mmap mapping.
*/
__u32 swizzle_mode;
};
 
struct drm_i915_gem_get_aperture {
/** Total size of the aperture used by i915_gem_execbuffer, in bytes */
__u64 aper_size;
 
/**
* Available space in the aperture used by i915_gem_execbuffer, in
* bytes
*/
__u64 aper_available_size;
};
 
struct drm_i915_get_pipe_from_crtc_id {
/** ID of CRTC being requested **/
__u32 crtc_id;
 
/** pipe of requested CRTC **/
__u32 pipe;
};
 
#define I915_MADV_WILLNEED 0
#define I915_MADV_DONTNEED 1
#define __I915_MADV_PURGED 2 /* internal state */
 
struct drm_i915_gem_madvise {
/** Handle of the buffer to change the backing store advice */
__u32 handle;
 
/* Advice: either the buffer will be needed again in the near future,
* or wont be and could be discarded under memory pressure.
*/
__u32 madv;
 
/** Whether the backing store still exists. */
__u32 retained;
};
 
/* flags */
#define I915_OVERLAY_TYPE_MASK 0xff
#define I915_OVERLAY_YUV_PLANAR 0x01
#define I915_OVERLAY_YUV_PACKED 0x02
#define I915_OVERLAY_RGB 0x03
 
#define I915_OVERLAY_DEPTH_MASK 0xff00
#define I915_OVERLAY_RGB24 0x1000
#define I915_OVERLAY_RGB16 0x2000
#define I915_OVERLAY_RGB15 0x3000
#define I915_OVERLAY_YUV422 0x0100
#define I915_OVERLAY_YUV411 0x0200
#define I915_OVERLAY_YUV420 0x0300
#define I915_OVERLAY_YUV410 0x0400
 
#define I915_OVERLAY_SWAP_MASK 0xff0000
#define I915_OVERLAY_NO_SWAP 0x000000
#define I915_OVERLAY_UV_SWAP 0x010000
#define I915_OVERLAY_Y_SWAP 0x020000
#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000
 
#define I915_OVERLAY_FLAGS_MASK 0xff000000
#define I915_OVERLAY_ENABLE 0x01000000
 
struct drm_intel_overlay_put_image {
/* various flags and src format description */
__u32 flags;
/* source picture description */
__u32 bo_handle;
/* stride values and offsets are in bytes, buffer relative */
__u16 stride_Y; /* stride for packed formats */
__u16 stride_UV;
__u32 offset_Y; /* offset for packet formats */
__u32 offset_U;
__u32 offset_V;
/* in pixels */
__u16 src_width;
__u16 src_height;
/* to compensate the scaling factors for partially covered surfaces */
__u16 src_scan_width;
__u16 src_scan_height;
/* output crtc description */
__u32 crtc_id;
__u16 dst_x;
__u16 dst_y;
__u16 dst_width;
__u16 dst_height;
};
 
/* flags */
#define I915_OVERLAY_UPDATE_ATTRS (1<<0)
#define I915_OVERLAY_UPDATE_GAMMA (1<<1)
struct drm_intel_overlay_attrs {
__u32 flags;
__u32 color_key;
__s32 brightness;
__u32 contrast;
__u32 saturation;
__u32 gamma0;
__u32 gamma1;
__u32 gamma2;
__u32 gamma3;
__u32 gamma4;
__u32 gamma5;
};
 
/*
* Intel sprite handling
*
* Color keying works with a min/mask/max tuple. Both source and destination
* color keying is allowed.
*
* Source keying:
* Sprite pixels within the min & max values, masked against the color channels
* specified in the mask field, will be transparent. All other pixels will
* be displayed on top of the primary plane. For RGB surfaces, only the min
* and mask fields will be used; ranged compares are not allowed.
*
* Destination keying:
* Primary plane pixels that match the min value, masked against the color
* channels specified in the mask field, will be replaced by corresponding
* pixels from the sprite plane.
*
* Note that source & destination keying are exclusive; only one can be
* active on a given plane.
*/
 
#define I915_SET_COLORKEY_NONE (1<<0) /* disable color key matching */
#define I915_SET_COLORKEY_DESTINATION (1<<1)
#define I915_SET_COLORKEY_SOURCE (1<<2)
struct drm_intel_sprite_colorkey {
__u32 plane_id;
__u32 min_value;
__u32 channel_mask;
__u32 max_value;
__u32 flags;
};
 
struct drm_i915_gem_wait {
/** Handle of BO we shall wait on */
__u32 bo_handle;
__u32 flags;
/** Number of nanoseconds to wait, Returns time remaining. */
__s64 timeout_ns;
};
 
struct drm_i915_gem_context_create {
/* output: id of new context*/
__u32 ctx_id;
__u32 pad;
};
 
struct drm_i915_gem_context_destroy {
__u32 ctx_id;
__u32 pad;
};
 
struct drm_i915_reg_read {
__u64 offset;
__u64 val; /* Return value */
};
#endif /* _I915_DRM_H_ */
/drivers/video/Intel-2D/intel.h
0,0 → 1,152
 
 
/** enumeration of 3d consumers so some can maintain invariant state. */
enum last_3d {
LAST_3D_OTHER,
LAST_3D_VIDEO,
LAST_3D_RENDER,
LAST_3D_ROTATION
};
 
 
 
typedef struct intel_screen_private {
int cpp;
 
#define RENDER_BATCH I915_EXEC_RENDER
#define BLT_BATCH I915_EXEC_BLT
 
unsigned int current_batch;
 
dri_bufmgr *bufmgr;
 
uint32_t batch_ptr[4096];
/** Byte offset in batch_ptr for the next dword to be emitted. */
unsigned int batch_used;
/** Position in batch_ptr at the start of the current BEGIN_BATCH */
unsigned int batch_emit_start;
/** Number of bytes to be emitted in the current BEGIN_BATCH. */
uint32_t batch_emitting;
dri_bo *batch_bo, *last_batch_bo[2];
/** Whether we're in a section of code that can't tolerate flushing */
Bool in_batch_atomic;
/** Ending batch_used that was verified by intel_start_batch_atomic() */
int batch_atomic_limit;
struct list batch_pixmaps;
drm_intel_bo *wa_scratch_bo;
 
unsigned int tiling;
 
#define INTEL_TILING_FB 0x1
#define INTEL_TILING_2D 0x2
#define INTEL_TILING_3D 0x4
#define INTEL_TILING_ALL (~0)
 
Bool has_relaxed_fencing;
 
int Chipset;
 
unsigned int BR[20];
 
void (*vertex_flush) (struct intel_screen_private *intel);
void (*batch_flush) (struct intel_screen_private *intel);
void (*batch_commit_notify) (struct intel_screen_private *intel);
 
Bool need_sync;
 
int accel_pixmap_offset_alignment;
int accel_max_x;
int accel_max_y;
int max_bo_size;
int max_gtt_map_size;
int max_tiling_size;
 
struct {
drm_intel_bo *gen4_vs_bo;
drm_intel_bo *gen4_sf_bo;
drm_intel_bo *gen4_wm_packed_bo;
drm_intel_bo *gen4_wm_planar_bo;
drm_intel_bo *gen4_cc_bo;
drm_intel_bo *gen4_cc_vp_bo;
drm_intel_bo *gen4_sampler_bo;
drm_intel_bo *gen4_sip_kernel_bo;
drm_intel_bo *wm_prog_packed_bo;
drm_intel_bo *wm_prog_planar_bo;
drm_intel_bo *gen6_blend_bo;
drm_intel_bo *gen6_depth_stencil_bo;
} video;
 
/* Render accel state */
float scale_units[2][2];
/** Transform pointers for src/mask, or NULL if identity */
PictTransform *transform[2];
 
PixmapPtr render_source, render_mask, render_dest;
PicturePtr render_source_picture, render_mask_picture, render_dest_picture;
Bool needs_3d_invariant;
Bool needs_render_state_emit;
Bool needs_render_vertex_emit;
 
/* i830 render accel state */
uint32_t render_dest_format;
uint32_t cblend, ablend, s8_blendctl;
 
/* i915 render accel state */
PixmapPtr texture[2];
uint32_t mapstate[6];
uint32_t samplerstate[6];
 
struct {
int op;
uint32_t dst_format;
} i915_render_state;
 
struct {
int num_sf_outputs;
int drawrect;
uint32_t blend;
dri_bo *samplers;
dri_bo *kernel;
} gen6_render_state;
 
uint32_t prim_offset;
void (*prim_emit)(struct intel_screen_private *intel,
int srcX, int srcY,
int maskX, int maskY,
int dstX, int dstY,
int w, int h);
int floats_per_vertex;
int last_floats_per_vertex;
uint16_t vertex_offset;
uint16_t vertex_count;
uint16_t vertex_index;
uint16_t vertex_used;
uint32_t vertex_id;
float vertex_ptr[4*1024];
dri_bo *vertex_bo;
 
uint8_t surface_data[16*1024];
uint16_t surface_used;
uint16_t surface_table;
uint32_t surface_reloc;
dri_bo *surface_bo;
 
/* 965 render acceleration state */
struct gen4_render_state *gen4_render_state;
 
Bool use_pageflipping;
Bool use_triple_buffer;
Bool force_fallback;
Bool has_kernel_flush;
Bool needs_flush;
 
enum last_3d last_3d;
 
/**
* User option to print acceleration fallback info to the server log.
*/
Bool fallback_debug;
unsigned debug_flush;
Bool has_prime_vmap_flush;
} intel_screen_private;
 
/drivers/video/Intel-2D/intel_driver.h
0,0 → 1,288
#ifndef INTEL_DRIVER_H
#define INTEL_DRIVER_H
 
#define INTEL_VERSION 4000
#define INTEL_NAME "intel"
#define INTEL_DRIVER_NAME "intel"
 
#define INTEL_VERSION_MAJOR PACKAGE_VERSION_MAJOR
#define INTEL_VERSION_MINOR PACKAGE_VERSION_MINOR
#define INTEL_VERSION_PATCH PACKAGE_VERSION_PATCHLEVEL
 
#ifndef PCI_CHIP_I810
#define PCI_CHIP_I810 0x7121
#define PCI_CHIP_I810_DC100 0x7123
#define PCI_CHIP_I810_E 0x7125
#define PCI_CHIP_I815 0x1132
#define PCI_CHIP_I810_BRIDGE 0x7120
#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
#define PCI_CHIP_I810_E_BRIDGE 0x7124
#define PCI_CHIP_I815_BRIDGE 0x1130
#endif
 
#ifndef PCI_CHIP_I830_M
#define PCI_CHIP_I830_M 0x3577
#define PCI_CHIP_I830_M_BRIDGE 0x3575
#endif
 
#ifndef PCI_CHIP_845_G
#define PCI_CHIP_845_G 0x2562
#define PCI_CHIP_845_G_BRIDGE 0x2560
#endif
 
#ifndef PCI_CHIP_I854
#define PCI_CHIP_I854 0x358E
#define PCI_CHIP_I854_BRIDGE 0x358C
#endif
 
#ifndef PCI_CHIP_I855_GM
#define PCI_CHIP_I855_GM 0x3582
#define PCI_CHIP_I855_GM_BRIDGE 0x3580
#endif
 
#ifndef PCI_CHIP_I865_G
#define PCI_CHIP_I865_G 0x2572
#define PCI_CHIP_I865_G_BRIDGE 0x2570
#endif
 
#ifndef PCI_CHIP_I915_G
#define PCI_CHIP_I915_G 0x2582
#define PCI_CHIP_I915_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I915_GM
#define PCI_CHIP_I915_GM 0x2592
#define PCI_CHIP_I915_GM_BRIDGE 0x2590
#endif
 
#ifndef PCI_CHIP_E7221_G
#define PCI_CHIP_E7221_G 0x258A
/* Same as I915_G_BRIDGE */
#define PCI_CHIP_E7221_G_BRIDGE 0x2580
#endif
 
#ifndef PCI_CHIP_I945_G
#define PCI_CHIP_I945_G 0x2772
#define PCI_CHIP_I945_G_BRIDGE 0x2770
#endif
 
#ifndef PCI_CHIP_I945_GM
#define PCI_CHIP_I945_GM 0x27A2
#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
#endif
 
#ifndef PCI_CHIP_I945_GME
#define PCI_CHIP_I945_GME 0x27AE
#define PCI_CHIP_I945_GME_BRIDGE 0x27AC
#endif
 
#ifndef PCI_CHIP_PINEVIEW_M
#define PCI_CHIP_PINEVIEW_M 0xA011
#define PCI_CHIP_PINEVIEW_M_BRIDGE 0xA010
#define PCI_CHIP_PINEVIEW_G 0xA001
#define PCI_CHIP_PINEVIEW_G_BRIDGE 0xA000
#endif
 
#ifndef PCI_CHIP_G35_G
#define PCI_CHIP_G35_G 0x2982
#define PCI_CHIP_G35_G_BRIDGE 0x2980
#endif
 
#ifndef PCI_CHIP_I965_Q
#define PCI_CHIP_I965_Q 0x2992
#define PCI_CHIP_I965_Q_BRIDGE 0x2990
#endif
 
#ifndef PCI_CHIP_I965_G
#define PCI_CHIP_I965_G 0x29A2
#define PCI_CHIP_I965_G_BRIDGE 0x29A0
#endif
 
#ifndef PCI_CHIP_I946_GZ
#define PCI_CHIP_I946_GZ 0x2972
#define PCI_CHIP_I946_GZ_BRIDGE 0x2970
#endif
 
#ifndef PCI_CHIP_I965_GM
#define PCI_CHIP_I965_GM 0x2A02
#define PCI_CHIP_I965_GM_BRIDGE 0x2A00
#endif
 
#ifndef PCI_CHIP_I965_GME
#define PCI_CHIP_I965_GME 0x2A12
#define PCI_CHIP_I965_GME_BRIDGE 0x2A10
#endif
 
#ifndef PCI_CHIP_G33_G
#define PCI_CHIP_G33_G 0x29C2
#define PCI_CHIP_G33_G_BRIDGE 0x29C0
#endif
 
#ifndef PCI_CHIP_Q35_G
#define PCI_CHIP_Q35_G 0x29B2
#define PCI_CHIP_Q35_G_BRIDGE 0x29B0
#endif
 
#ifndef PCI_CHIP_Q33_G
#define PCI_CHIP_Q33_G 0x29D2
#define PCI_CHIP_Q33_G_BRIDGE 0x29D0
#endif
 
#ifndef PCI_CHIP_GM45_GM
#define PCI_CHIP_GM45_GM 0x2A42
#define PCI_CHIP_GM45_BRIDGE 0x2A40
#endif
 
#ifndef PCI_CHIP_G45_E_G
#define PCI_CHIP_G45_E_G 0x2E02
#define PCI_CHIP_G45_E_G_BRIDGE 0x2E00
#endif
 
#ifndef PCI_CHIP_G45_G
#define PCI_CHIP_G45_G 0x2E22
#define PCI_CHIP_G45_G_BRIDGE 0x2E20
#endif
 
#ifndef PCI_CHIP_Q45_G
#define PCI_CHIP_Q45_G 0x2E12
#define PCI_CHIP_Q45_G_BRIDGE 0x2E10
#endif
 
#ifndef PCI_CHIP_G41_G
#define PCI_CHIP_G41_G 0x2E32
#define PCI_CHIP_G41_G_BRIDGE 0x2E30
#endif
 
#ifndef PCI_CHIP_B43_G
#define PCI_CHIP_B43_G 0x2E42
#define PCI_CHIP_B43_G_BRIDGE 0x2E40
#endif
 
#ifndef PCI_CHIP_B43_G1
#define PCI_CHIP_B43_G1 0x2E92
#define PCI_CHIP_B43_G1_BRIDGE 0x2E90
#endif
 
#ifndef PCI_CHIP_IRONLAKE_D_G
#define PCI_CHIP_IRONLAKE_D_G 0x0042
#define PCI_CHIP_IRONLAKE_D_G_BRIDGE 0x0040
#endif
 
#ifndef PCI_CHIP_IRONLAKE_M_G
#define PCI_CHIP_IRONLAKE_M_G 0x0046
#define PCI_CHIP_IRONLAKE_M_G_BRIDGE 0x0044
#endif
 
#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
 
#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156
#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
#define PCI_CHIP_IVYBRIDGE_D_GT1 0x0152
#define PCI_CHIP_IVYBRIDGE_D_GT2 0x0162
#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a
#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
 
#define PCI_CHIP_HASWELL_D_GT1 0x0402
#define PCI_CHIP_HASWELL_D_GT2 0x0412
#define PCI_CHIP_HASWELL_D_GT2_PLUS 0x0422
#define PCI_CHIP_HASWELL_M_GT1 0x0406
#define PCI_CHIP_HASWELL_M_GT2 0x0416
#define PCI_CHIP_HASWELL_M_GT2_PLUS 0x0426
#define PCI_CHIP_HASWELL_S_GT1 0x040A
#define PCI_CHIP_HASWELL_S_GT2 0x041A
#define PCI_CHIP_HASWELL_S_GT2_PLUS 0x042A
#define PCI_CHIP_HASWELL_SDV_D_GT1 0x0C02
#define PCI_CHIP_HASWELL_SDV_D_GT2 0x0C12
#define PCI_CHIP_HASWELL_SDV_D_GT2_PLUS 0x0C22
#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06
#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16
#define PCI_CHIP_HASWELL_SDV_M_GT2_PLUS 0x0C26
#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A
#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A
#define PCI_CHIP_HASWELL_SDV_S_GT2_PLUS 0x0C2A
#define PCI_CHIP_HASWELL_ULT_D_GT1 0x0A02
#define PCI_CHIP_HASWELL_ULT_D_GT2 0x0A12
#define PCI_CHIP_HASWELL_ULT_D_GT2_PLUS 0x0A22
#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06
#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16
#define PCI_CHIP_HASWELL_ULT_M_GT2_PLUS 0x0A26
#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A
#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A
#define PCI_CHIP_HASWELL_ULT_S_GT2_PLUS 0x0A2A
#define PCI_CHIP_HASWELL_CRW_D_GT1 0x0D12
#define PCI_CHIP_HASWELL_CRW_D_GT2 0x0D22
#define PCI_CHIP_HASWELL_CRW_D_GT2_PLUS 0x0D32
#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D16
#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D26
#define PCI_CHIP_HASWELL_CRW_M_GT2_PLUS 0x0D36
#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D1A
#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D2A
#define PCI_CHIP_HASWELL_CRW_S_GT2_PLUS 0x0D3A
 
#define PCI_CHIP_VALLEYVIEW_PO 0x0f30
#define PCI_CHIP_VALLEYVIEW_1 0x0f31
#define PCI_CHIP_VALLEYVIEW_2 0x0f32
#define PCI_CHIP_VALLEYVIEW_3 0x0f33
 
#endif
 
#define I85X_CAPID 0x44
#define I85X_VARIANT_MASK 0x7
#define I85X_VARIANT_SHIFT 5
#define I855_GME 0x0
#define I855_GM 0x4
#define I852_GME 0x2
#define I852_GM 0x5
 
#define I810_MEMBASE(p,n) (p)->regions[(n)].base_addr
#define VENDOR_ID(p) (p)->vendor_id
#define DEVICE_ID(p) (p)->device_id
#define SUBVENDOR_ID(p) (p)->subvendor_id
#define SUBSYS_ID(p) (p)->subdevice_id
#define CHIP_REVISION(p) (p)->revision
 
#define INTEL_INFO(intel) ((intel)->info)
#define IS_GENx(intel, X) (INTEL_INFO(intel)->gen >= 8*(X) && INTEL_INFO(intel)->gen < 8*((X)+1))
#define IS_GEN1(intel) IS_GENx(intel, 1)
#define IS_GEN2(intel) IS_GENx(intel, 2)
#define IS_GEN3(intel) IS_GENx(intel, 3)
#define IS_GEN4(intel) IS_GENx(intel, 4)
#define IS_GEN5(intel) IS_GENx(intel, 5)
#define IS_GEN6(intel) IS_GENx(intel, 6)
#define IS_GEN7(intel) IS_GENx(intel, 7)
#define IS_HSW(intel) (INTEL_INFO(intel)->gen == 075)
 
/* Some chips have specific errata (or limits) that we need to workaround. */
#define IS_I830(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I830_M)
#define IS_845G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_845_G)
#define IS_I865G(intel) (DEVICE_ID((intel)->PciInfo) == PCI_CHIP_I865_G)
 
#define IS_I915G(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_G || DEVICE_ID(pI810->PciInfo) == PCI_CHIP_E7221_G)
#define IS_I915GM(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I915_GM)
 
#define IS_965_Q(pI810) (DEVICE_ID(pI810->PciInfo) == PCI_CHIP_I965_Q)
 
/* supports Y tiled surfaces (pre-965 Mesa isn't ready yet) */
#define SUPPORTS_YTILING(pI810) (INTEL_INFO(intel)->gen >= 040)
#define HAS_BLT(pI810) (INTEL_INFO(intel)->gen >= 060)
 
struct intel_device_info {
int gen;
};
 
//void intel_detect_chipset(ScrnInfoPtr scrn,
// EntityInfoPtr ent,
// struct pci_device *pci);
 
 
#endif /* INTEL_DRIVER_H */
/drivers/video/Intel-2D/intel_list.h
0,0 → 1,353
/*
* Copyright © 2010-2012 Intel Corporation
* Copyright © 2010 Francisco Jerez <currojerez@riseup.net>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
 
#ifndef _INTEL_LIST_H_
#define _INTEL_LIST_H_
 
#include <stdbool.h>
 
/**
* @file Classic doubly-link circular list implementation.
* For real usage examples of the linked list, see the file test/list.c
*
* Example:
* We need to keep a list of struct foo in the parent struct bar, i.e. what
* we want is something like this.
*
* struct bar {
* ...
* struct foo *list_of_foos; -----> struct foo {}, struct foo {}, struct foo{}
* ...
* }
*
* We need one list head in bar and a list element in all list_of_foos (both are of
* data type 'struct list').
*
* struct bar {
* ...
* struct list list_of_foos;
* ...
* }
*
* struct foo {
* ...
* struct list entry;
* ...
* }
*
* Now we initialize the list head:
*
* struct bar bar;
* ...
* list_init(&bar.list_of_foos);
*
* Then we create the first element and add it to this list:
*
* struct foo *foo = malloc(...);
* ....
* list_add(&foo->entry, &bar.list_of_foos);
*
* Repeat the above for each element you want to add to the list. Deleting
* works with the element itself.
* list_del(&foo->entry);
* free(foo);
*
* Note: calling list_del(&bar.list_of_foos) will set bar.list_of_foos to an empty
* list again.
*
* Looping through the list requires a 'struct foo' as iterator and the
* name of the field the subnodes use.
*
* struct foo *iterator;
* list_for_each_entry(iterator, &bar.list_of_foos, entry) {
* if (iterator->something == ...)
* ...
* }
*
* Note: You must not call list_del() on the iterator if you continue the
* loop. You need to run the safe for-each loop instead:
*
* struct foo *iterator, *next;
* list_for_each_entry_safe(iterator, next, &bar.list_of_foos, entry) {
* if (...)
* list_del(&iterator->entry);
* }
*
*/
 
/**
* The linkage struct for list nodes. This struct must be part of your
* to-be-linked struct. struct list is required for both the head of the
* list and for each list node.
*
* Position and name of the struct list field is irrelevant.
* There are no requirements that elements of a list are of the same type.
* There are no requirements for a list head, any struct list can be a list
* head.
*/
struct list {
struct list *next, *prev;
};
 
/**
* Initialize the list as an empty list.
*
* Example:
* list_init(&bar->list_of_foos);
*
* @param The list to initialized.
*/
static void
list_init(struct list *list)
{
list->next = list->prev = list;
}
 
static inline void
__list_add(struct list *entry,
struct list *prev,
struct list *next)
{
next->prev = entry;
entry->next = next;
entry->prev = prev;
prev->next = entry;
}
 
/**
* Insert a new element after the given list head. The new element does not
* need to be initialised as empty list.
* The list changes from:
* head → some element → ...
* to
* head → new element → older element → ...
*
* Example:
* struct foo *newfoo = malloc(...);
* list_add(&newfoo->entry, &bar->list_of_foos);
*
* @param entry The new element to prepend to the list.
* @param head The existing list.
*/
static inline void
list_add(struct list *entry, struct list *head)
{
__list_add(entry, head, head->next);
}
 
static inline void
list_add_tail(struct list *entry, struct list *head)
{
__list_add(entry, head->prev, head);
}
 
static inline void list_replace(struct list *old,
struct list *new)
{
new->next = old->next;
new->next->prev = new;
new->prev = old->prev;
new->prev->next = new;
}
 
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
 
#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)
 
/**
* Append a new element to the end of the list given with this list head.
*
* The list changes from:
* head → some element → ... → lastelement
* to
* head → some element → ... → lastelement → new element
*
* Example:
* struct foo *newfoo = malloc(...);
* list_append(&newfoo->entry, &bar->list_of_foos);
*
* @param entry The new element to prepend to the list.
* @param head The existing list.
*/
static inline void
list_append(struct list *entry, struct list *head)
{
__list_add(entry, head->prev, head);
}
 
 
static inline void
__list_del(struct list *prev, struct list *next)
{
assert(next->prev == prev->next);
next->prev = prev;
prev->next = next;
}
 
static inline void
_list_del(struct list *entry)
{
assert(entry->prev->next == entry);
assert(entry->next->prev == entry);
__list_del(entry->prev, entry->next);
}
 
/**
* Remove the element from the list it is in. Using this function will reset
* the pointers to/from this element so it is removed from the list. It does
* NOT free the element itself or manipulate it otherwise.
*
* Using list_del on a pure list head (like in the example at the top of
* this file) will NOT remove the first element from
* the list but rather reset the list as empty list.
*
* Example:
* list_del(&foo->entry);
*
* @param entry The element to remove.
*/
static inline void
list_del(struct list *entry)
{
_list_del(entry);
list_init(entry);
}
 
static inline void list_move(struct list *list, struct list *head)
{
if (list->prev != head) {
_list_del(list);
list_add(list, head);
}
}
 
static inline void list_move_tail(struct list *list, struct list *head)
{
_list_del(list);
list_add_tail(list, head);
}
 
/**
* Check if the list is empty.
*
* Example:
* list_is_empty(&bar->list_of_foos);
*
* @return True if the list contains one or more elements or False otherwise.
*/
static inline bool
list_is_empty(struct list *head)
{
return head->next == head;
}
 
/**
* Alias of container_of
*/
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
 
/**
* Retrieve the first list entry for the given list pointer.
*
* Example:
* struct foo *first;
* first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos);
*
* @param ptr The list head
* @param type Data type of the list element to retrieve
* @param member Member name of the struct list field in the list element.
* @return A pointer to the first list element.
*/
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
 
/**
* Retrieve the last list entry for the given listpointer.
*
* Example:
* struct foo *first;
* first = list_last_entry(&bar->list_of_foos, struct foo, list_of_foos);
*
* @param ptr The list head
* @param type Data type of the list element to retrieve
* @param member Member name of the struct list field in the list element.
* @return A pointer to the last list element.
*/
#define list_last_entry(ptr, type, member) \
list_entry((ptr)->prev, type, member)
 
#define __container_of(ptr, sample, member) \
(void *)((char *)(ptr) \
- ((char *)&(sample)->member - (char *)(sample)))
/**
* Loop through the list given by head and set pos to struct in the list.
*
* Example:
* struct foo *iterator;
* list_for_each_entry(iterator, &bar->list_of_foos, entry) {
* [modify iterator]
* }
*
* This macro is not safe for node deletion. Use list_for_each_entry_safe
* instead.
*
* @param pos Iterator variable of the type of the list elements.
* @param head List head
* @param member Member name of the struct list in the list elements.
*
*/
#define list_for_each_entry(pos, head, member) \
for (pos = __container_of((head)->next, pos, member); \
&pos->member != (head); \
pos = __container_of(pos->member.next, pos, member))
 
#define list_for_each_entry_reverse(pos, head, member) \
for (pos = __container_of((head)->prev, pos, member); \
&pos->member != (head); \
pos = __container_of(pos->member.prev, pos, member))
 
/**
* Loop through the list, keeping a backup pointer to the element. This
* macro allows for the deletion of a list element while looping through the
* list.
*
* See list_for_each_entry for more details.
*/
#define list_for_each_entry_safe(pos, tmp, head, member) \
for (pos = __container_of((head)->next, pos, member), \
tmp = __container_of(pos->member.next, pos, member); \
&pos->member != (head); \
pos = tmp, tmp = __container_of(pos->member.next, tmp, member))
 
 
#undef container_of
#define container_of(ptr, type, member) \
((type *)((char *)(ptr) - (char *) &((type *)0)->member))
 
#endif /* _INTEL_LIST_H_ */
 
/drivers/video/Intel-2D/kgem-sna.c
0,0 → 1,559
/*
* Copyright (c) 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Chris Wilson <chris@chris-wilson.co.uk>
*
*/
 
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
 
#include "sna.h"
#include "sna_reg.h"
 
#define DBG_NO_HW 0
#define DBG_NO_TILING 1
#define DBG_NO_CACHE 0
#define DBG_NO_CACHE_LEVEL 0
#define DBG_NO_CPU 0
#define DBG_NO_USERPTR 0
#define DBG_NO_LLC 0
#define DBG_NO_SEMAPHORES 0
#define DBG_NO_MADV 0
#define DBG_NO_UPLOAD_CACHE 0
#define DBG_NO_UPLOAD_ACTIVE 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
 
#define MAX_GTT_VMA_CACHE 512
#define MAX_CPU_VMA_CACHE INT16_MAX
#define MAP_PRESERVE_TIME 10
 
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
#define MAKE_USER_MAP(ptr) ((void*)((uintptr_t)(ptr) | 3))
#define IS_USER_MAP(ptr) ((uintptr_t)(ptr) & 2)
#define __MAP_TYPE(ptr) ((uintptr_t)(ptr) & 3)
 
#define MAKE_REQUEST(rq, ring) ((struct kgem_request *)((uintptr_t)(rq) | (ring)))
 
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
 
 
 
static int gem_param(struct kgem *kgem, int name)
{
ioctl_t io;
 
drm_i915_getparam_t gp;
int v = -1; /* No param uses the sign bit, reserve it for errors */
 
VG_CLEAR(gp);
gp.param = name;
gp.value = &v;
 
io.handle = kgem->fd;
io.io_code = SRV_GET_PARAM;
io.input = &gp;
io.inp_size = sizeof(gp);
io.output = NULL;
io.out_size = 0;
 
if (call_service(&io)!=0)
return -1;
 
VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
return v;
}
 
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
 
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
 
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
 
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
bool detected = false;
int ret;
 
if (DBG_NO_SEMAPHORES)
return false;
 
ret = gem_param(kgem, LOCAL_I915_PARAM_HAS_SEMAPHORES);
if (ret != -1)
return ret > 0;
 
return detected;
}