/drivers/video/Intel-2D/brw/brw.h |
---|
0,0 → 1,17 |
#include "brw_eu.h" |
bool brw_sf_kernel__nomask(struct brw_compile *p); |
bool brw_sf_kernel__mask(struct brw_compile *p); |
bool brw_wm_kernel__affine(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__projective(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch_width); |
bool brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch_width); |
/drivers/video/Intel-2D/brw/brw_eu.c |
---|
0,0 → 1,150 |
/* |
Copyright (C) Intel Corp. 2006. All Rights Reserved. |
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
develop this 3D driver. |
Permission is hereby granted, free of charge, to any person obtaining |
a copy of this software and associated documentation files (the |
"Software"), to deal in the Software without restriction, including |
without limitation the rights to use, copy, modify, merge, publish, |
distribute, sublicense, and/or sell copies of the Software, and to |
permit persons to whom the Software is furnished to do so, subject to |
the following conditions: |
The above copyright notice and this permission notice (including the |
next paragraph) shall be included in all copies or substantial |
portions of the Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
**********************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "brw_eu.h" |
#include <string.h> |
#include <stdlib.h> |
/* Returns the corresponding conditional mod for swapping src0 and |
* src1 in e.g. CMP. |
*/ |
uint32_t |
brw_swap_cmod(uint32_t cmod) |
{ |
switch (cmod) { |
case BRW_CONDITIONAL_Z: |
case BRW_CONDITIONAL_NZ: |
return cmod; |
case BRW_CONDITIONAL_G: |
return BRW_CONDITIONAL_LE; |
case BRW_CONDITIONAL_GE: |
return BRW_CONDITIONAL_L; |
case BRW_CONDITIONAL_L: |
return BRW_CONDITIONAL_GE; |
case BRW_CONDITIONAL_LE: |
return BRW_CONDITIONAL_G; |
default: |
return ~0; |
} |
} |
/* How does predicate control work when execution_size != 8? Do I |
* need to test/set for 0xffff when execution_size is 16? |
*/ |
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) |
{ |
p->current->header.predicate_control = BRW_PREDICATE_NONE; |
if (value != 0xff) { |
if (value != p->flag_value) { |
brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); |
p->flag_value = value; |
} |
p->current->header.predicate_control = BRW_PREDICATE_NORMAL; |
} |
} |
void brw_set_compression_control(struct brw_compile *p, |
enum brw_compression compression_control) |
{ |
p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); |
if (p->gen >= 060) { |
/* Since we don't use the 32-wide support in gen6, we translate |
* the pre-gen6 compression control here. |
*/ |
switch (compression_control) { |
case BRW_COMPRESSION_NONE: |
/* This is the "use the first set of bits of dmask/vmask/arf |
* according to execsize" option. |
*/ |
p->current->header.compression_control = GEN6_COMPRESSION_1Q; |
break; |
case BRW_COMPRESSION_2NDHALF: |
/* For 8-wide, this is "use the second set of 8 bits." */ |
p->current->header.compression_control = GEN6_COMPRESSION_2Q; |
break; |
case BRW_COMPRESSION_COMPRESSED: |
/* For 16-wide instruction compression, use the first set of 16 bits |
* since we don't do 32-wide dispatch. |
*/ |
p->current->header.compression_control = GEN6_COMPRESSION_1H; |
break; |
default: |
assert(!"not reached"); |
p->current->header.compression_control = GEN6_COMPRESSION_1H; |
break; |
} |
} else { |
p->current->header.compression_control = compression_control; |
} |
} |
void brw_push_insn_state( struct brw_compile *p ) |
{ |
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); |
memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); |
p->compressed_stack[p->current - p->stack] = p->compressed; |
p->current++; |
} |
void brw_pop_insn_state( struct brw_compile *p ) |
{ |
assert(p->current != p->stack); |
p->current--; |
p->compressed = p->compressed_stack[p->current - p->stack]; |
} |
void brw_compile_init(struct brw_compile *p, int gen, void *store) |
{ |
assert(gen); |
p->gen = gen; |
p->store = store; |
p->nr_insn = 0; |
p->current = p->stack; |
p->compressed = false; |
memset(p->current, 0, sizeof(p->current[0])); |
/* Some defaults? |
*/ |
brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ |
brw_set_saturate(p, 0); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_predicate_control_flag_value(p, 0xff); |
p->if_stack_depth = 0; |
p->if_stack_array_size = 0; |
p->if_stack = NULL; |
} |
/drivers/video/Intel-2D/brw/brw_eu.h |
---|
0,0 → 1,2266 |
/* |
Copyright (C) Intel Corp. 2006. All Rights Reserved. |
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
develop this 3D driver. |
Permission is hereby granted, free of charge, to any person obtaining |
a copy of this software and associated documentation files (the |
"Software"), to deal in the Software without restriction, including |
without limitation the rights to use, copy, modify, merge, publish, |
distribute, sublicense, and/or sell copies of the Software, and to |
permit persons to whom the Software is furnished to do so, subject to |
the following conditions: |
The above copyright notice and this permission notice (including the |
next paragraph) shall be included in all copies or substantial |
portions of the Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
**********************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#ifndef BRW_EU_H |
#define BRW_EU_H |
#include <stdbool.h> |
#include <stdint.h> |
#include <stdio.h> |
#include <assert.h> |
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) |
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) |
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) |
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) |
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) |
#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) |
#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) |
#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) |
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) |
#define WRITEMASK_X 0x1 |
#define WRITEMASK_Y 0x2 |
#define WRITEMASK_Z 0x4 |
#define WRITEMASK_W 0x8 |
#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y) |
#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z) |
#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W) |
/** Number of general purpose registers (VS, WM, etc) */ |
#define BRW_MAX_GRF 128 |
/** Number of message register file registers */ |
#define BRW_MAX_MRF 16 |
#define BRW_ALIGN_1 0 |
#define BRW_ALIGN_16 1 |
#define BRW_ADDRESS_DIRECT 0 |
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 |
#define BRW_CHANNEL_X 0 |
#define BRW_CHANNEL_Y 1 |
#define BRW_CHANNEL_Z 2 |
#define BRW_CHANNEL_W 3 |
enum brw_compression { |
BRW_COMPRESSION_NONE, |
BRW_COMPRESSION_2NDHALF, |
BRW_COMPRESSION_COMPRESSED, |
}; |
#define GEN6_COMPRESSION_1Q 0 |
#define GEN6_COMPRESSION_2Q 1 |
#define GEN6_COMPRESSION_3Q 2 |
#define GEN6_COMPRESSION_4Q 3 |
#define GEN6_COMPRESSION_1H 0 |
#define GEN6_COMPRESSION_2H 2 |
#define BRW_CONDITIONAL_NONE 0 |
#define BRW_CONDITIONAL_Z 1 |
#define BRW_CONDITIONAL_NZ 2 |
#define BRW_CONDITIONAL_EQ 1 /* Z */ |
#define BRW_CONDITIONAL_NEQ 2 /* NZ */ |
#define BRW_CONDITIONAL_G 3 |
#define BRW_CONDITIONAL_GE 4 |
#define BRW_CONDITIONAL_L 5 |
#define BRW_CONDITIONAL_LE 6 |
#define BRW_CONDITIONAL_R 7 |
#define BRW_CONDITIONAL_O 8 |
#define BRW_CONDITIONAL_U 9 |
#define BRW_DEBUG_NONE 0 |
#define BRW_DEBUG_BREAKPOINT 1 |
#define BRW_DEPENDENCY_NORMAL 0 |
#define BRW_DEPENDENCY_NOTCLEARED 1 |
#define BRW_DEPENDENCY_NOTCHECKED 2 |
#define BRW_DEPENDENCY_DISABLE 3 |
#define BRW_EXECUTE_1 0 |
#define BRW_EXECUTE_2 1 |
#define BRW_EXECUTE_4 2 |
#define BRW_EXECUTE_8 3 |
#define BRW_EXECUTE_16 4 |
#define BRW_EXECUTE_32 5 |
#define BRW_HORIZONTAL_STRIDE_0 0 |
#define BRW_HORIZONTAL_STRIDE_1 1 |
#define BRW_HORIZONTAL_STRIDE_2 2 |
#define BRW_HORIZONTAL_STRIDE_4 3 |
#define BRW_INSTRUCTION_NORMAL 0 |
#define BRW_INSTRUCTION_SATURATE 1 |
#define BRW_MASK_ENABLE 0 |
#define BRW_MASK_DISABLE 1 |
/** @{ |
* |
* Gen6 has replaced "mask enable/disable" with WECtrl, which is |
* effectively the same but much simpler to think about. Now, there |
* are two contributors ANDed together to whether channels are |
* executed: The predication on the instruction, and the channel write |
* enable. |
*/ |
/** |
* This is the default value. It means that a channel's write enable is set |
* if the per-channel IP is pointing at this instruction. |
*/ |
#define BRW_WE_NORMAL 0 |
/** |
* This is used like BRW_MASK_DISABLE, and causes all channels to have |
* their write enable set. Note that predication still contributes to |
* whether the channel actually gets written. |
*/ |
#define BRW_WE_ALL 1 |
/** @} */ |
enum opcode { |
/* These are the actual hardware opcodes. */ |
BRW_OPCODE_MOV = 1, |
BRW_OPCODE_SEL = 2, |
BRW_OPCODE_NOT = 4, |
BRW_OPCODE_AND = 5, |
BRW_OPCODE_OR = 6, |
BRW_OPCODE_XOR = 7, |
BRW_OPCODE_SHR = 8, |
BRW_OPCODE_SHL = 9, |
BRW_OPCODE_RSR = 10, |
BRW_OPCODE_RSL = 11, |
BRW_OPCODE_ASR = 12, |
BRW_OPCODE_CMP = 16, |
BRW_OPCODE_CMPN = 17, |
BRW_OPCODE_JMPI = 32, |
BRW_OPCODE_IF = 34, |
BRW_OPCODE_IFF = 35, |
BRW_OPCODE_ELSE = 36, |
BRW_OPCODE_ENDIF = 37, |
BRW_OPCODE_DO = 38, |
BRW_OPCODE_WHILE = 39, |
BRW_OPCODE_BREAK = 40, |
BRW_OPCODE_CONTINUE = 41, |
BRW_OPCODE_HALT = 42, |
BRW_OPCODE_MSAVE = 44, |
BRW_OPCODE_MRESTORE = 45, |
BRW_OPCODE_PUSH = 46, |
BRW_OPCODE_POP = 47, |
BRW_OPCODE_WAIT = 48, |
BRW_OPCODE_SEND = 49, |
BRW_OPCODE_SENDC = 50, |
BRW_OPCODE_MATH = 56, |
BRW_OPCODE_ADD = 64, |
BRW_OPCODE_MUL = 65, |
BRW_OPCODE_AVG = 66, |
BRW_OPCODE_FRC = 67, |
BRW_OPCODE_RNDU = 68, |
BRW_OPCODE_RNDD = 69, |
BRW_OPCODE_RNDE = 70, |
BRW_OPCODE_RNDZ = 71, |
BRW_OPCODE_MAC = 72, |
BRW_OPCODE_MACH = 73, |
BRW_OPCODE_LZD = 74, |
BRW_OPCODE_SAD2 = 80, |
BRW_OPCODE_SADA2 = 81, |
BRW_OPCODE_DP4 = 84, |
BRW_OPCODE_DPH = 85, |
BRW_OPCODE_DP3 = 86, |
BRW_OPCODE_DP2 = 87, |
BRW_OPCODE_DPA2 = 88, |
BRW_OPCODE_LINE = 89, |
BRW_OPCODE_PLN = 90, |
BRW_OPCODE_NOP = 126, |
/* These are compiler backend opcodes that get translated into other |
* instructions. |
*/ |
FS_OPCODE_FB_WRITE = 128, |
SHADER_OPCODE_RCP, |
SHADER_OPCODE_RSQ, |
SHADER_OPCODE_SQRT, |
SHADER_OPCODE_EXP2, |
SHADER_OPCODE_LOG2, |
SHADER_OPCODE_POW, |
SHADER_OPCODE_SIN, |
SHADER_OPCODE_COS, |
FS_OPCODE_DDX, |
FS_OPCODE_DDY, |
FS_OPCODE_PIXEL_X, |
FS_OPCODE_PIXEL_Y, |
FS_OPCODE_CINTERP, |
FS_OPCODE_LINTERP, |
FS_OPCODE_TEX, |
FS_OPCODE_TXB, |
FS_OPCODE_TXD, |
FS_OPCODE_TXF, |
FS_OPCODE_TXL, |
FS_OPCODE_TXS, |
FS_OPCODE_DISCARD, |
FS_OPCODE_SPILL, |
FS_OPCODE_UNSPILL, |
FS_OPCODE_PULL_CONSTANT_LOAD, |
VS_OPCODE_URB_WRITE, |
VS_OPCODE_SCRATCH_READ, |
VS_OPCODE_SCRATCH_WRITE, |
VS_OPCODE_PULL_CONSTANT_LOAD, |
}; |
#define BRW_PREDICATE_NONE 0 |
#define BRW_PREDICATE_NORMAL 1 |
#define BRW_PREDICATE_ALIGN1_ANYV 2 |
#define BRW_PREDICATE_ALIGN1_ALLV 3 |
#define BRW_PREDICATE_ALIGN1_ANY2H 4 |
#define BRW_PREDICATE_ALIGN1_ALL2H 5 |
#define BRW_PREDICATE_ALIGN1_ANY4H 6 |
#define BRW_PREDICATE_ALIGN1_ALL4H 7 |
#define BRW_PREDICATE_ALIGN1_ANY8H 8 |
#define BRW_PREDICATE_ALIGN1_ALL8H 9 |
#define BRW_PREDICATE_ALIGN1_ANY16H 10 |
#define BRW_PREDICATE_ALIGN1_ALL16H 11 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 |
#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 |
#define BRW_PREDICATE_ALIGN16_ANY4H 6 |
#define BRW_PREDICATE_ALIGN16_ALL4H 7 |
#define BRW_ARCHITECTURE_REGISTER_FILE 0 |
#define BRW_GENERAL_REGISTER_FILE 1 |
#define BRW_MESSAGE_REGISTER_FILE 2 |
#define BRW_IMMEDIATE_VALUE 3 |
#define BRW_REGISTER_TYPE_UD 0 |
#define BRW_REGISTER_TYPE_D 1 |
#define BRW_REGISTER_TYPE_UW 2 |
#define BRW_REGISTER_TYPE_W 3 |
#define BRW_REGISTER_TYPE_UB 4 |
#define BRW_REGISTER_TYPE_B 5 |
#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ |
#define BRW_REGISTER_TYPE_HF 6 |
#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ |
#define BRW_REGISTER_TYPE_F 7 |
#define BRW_ARF_NULL 0x00 |
#define BRW_ARF_ADDRESS 0x10 |
#define BRW_ARF_ACCUMULATOR 0x20 |
#define BRW_ARF_FLAG 0x30 |
#define BRW_ARF_MASK 0x40 |
#define BRW_ARF_MASK_STACK 0x50 |
#define BRW_ARF_MASK_STACK_DEPTH 0x60 |
#define BRW_ARF_STATE 0x70 |
#define BRW_ARF_CONTROL 0x80 |
#define BRW_ARF_NOTIFICATION_COUNT 0x90 |
#define BRW_ARF_IP 0xA0 |
#define BRW_MRF_COMPR4 (1 << 7) |
#define BRW_AMASK 0 |
#define BRW_IMASK 1 |
#define BRW_LMASK 2 |
#define BRW_CMASK 3 |
#define BRW_THREAD_NORMAL 0 |
#define BRW_THREAD_ATOMIC 1 |
#define BRW_THREAD_SWITCH 2 |
#define BRW_VERTICAL_STRIDE_0 0 |
#define BRW_VERTICAL_STRIDE_1 1 |
#define BRW_VERTICAL_STRIDE_2 2 |
#define BRW_VERTICAL_STRIDE_4 3 |
#define BRW_VERTICAL_STRIDE_8 4 |
#define BRW_VERTICAL_STRIDE_16 5 |
#define BRW_VERTICAL_STRIDE_32 6 |
#define BRW_VERTICAL_STRIDE_64 7 |
#define BRW_VERTICAL_STRIDE_128 8 |
#define BRW_VERTICAL_STRIDE_256 9 |
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF |
#define BRW_WIDTH_1 0 |
#define BRW_WIDTH_2 1 |
#define BRW_WIDTH_4 2 |
#define BRW_WIDTH_8 3 |
#define BRW_WIDTH_16 4 |
#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 |
#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 |
#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 |
#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 |
#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 |
#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 |
#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 |
#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 |
#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 |
#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 |
#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 |
#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 |
#define BRW_POLYGON_FACING_FRONT 0 |
#define BRW_POLYGON_FACING_BACK 1 |
#define BRW_MESSAGE_TARGET_NULL 0 |
#define BRW_MESSAGE_TARGET_MATH 1 /* reserved on GEN6 */ |
#define BRW_MESSAGE_TARGET_SAMPLER 2 |
#define BRW_MESSAGE_TARGET_GATEWAY 3 |
#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 |
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 |
#define BRW_MESSAGE_TARGET_URB 6 |
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 |
#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE 4 |
#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE 5 |
#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE 9 |
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 |
#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 |
#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 |
#define BRW_SAMPLER_MESSAGE_SAMPLE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 |
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 |
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 |
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 |
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 |
#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 |
#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE 0 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS 1 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD 2 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE 3 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD 7 |
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 |
/* for GEN5 only */ |
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 |
#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 |
#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 |
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 |
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 |
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 |
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 |
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 |
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 |
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 |
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 |
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 |
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 |
/* This one stays the same across generations. */ |
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 |
/* GEN4 */ |
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 |
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 2 |
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 |
/* G45, GEN5 */ |
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 |
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 |
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ 3 |
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 |
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 |
/* GEN6 */ |
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ 1 |
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 2 |
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ 4 |
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ 5 |
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 6 |
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 |
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 |
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 |
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 |
/** |
* Message target: Shared Function ID for where to SEND a message. |
* |
* These are enumerated in the ISA reference under "send - Send Message". |
* In particular, see the following tables: |
* - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition" |
* - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor" |
* - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) / |
* Overview / GPE Function IDs |
*/ |
enum brw_message_target { |
BRW_SFID_NULL = 0, |
BRW_SFID_MATH = 1, /* Only valid on Gen4-5 */ |
BRW_SFID_SAMPLER = 2, |
BRW_SFID_MESSAGE_GATEWAY = 3, |
BRW_SFID_DATAPORT_READ = 4, |
BRW_SFID_DATAPORT_WRITE = 5, |
BRW_SFID_URB = 6, |
BRW_SFID_THREAD_SPAWNER = 7, |
GEN6_SFID_DATAPORT_SAMPLER_CACHE = 4, |
GEN6_SFID_DATAPORT_RENDER_CACHE = 5, |
GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9, |
GEN7_SFID_DATAPORT_DATA_CACHE = 10, |
}; |
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 |
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 |
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 |
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 2 |
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 |
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 |
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 |
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 |
/* GEN6 */ |
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE 7 |
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8 |
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 9 |
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE 10 |
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 11 |
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 12 |
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE 13 |
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE 14 |
#define BRW_MATH_FUNCTION_INV 1 |
#define BRW_MATH_FUNCTION_LOG 2 |
#define BRW_MATH_FUNCTION_EXP 3 |
#define BRW_MATH_FUNCTION_SQRT 4 |
#define BRW_MATH_FUNCTION_RSQ 5 |
#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ |
#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ |
#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ |
#define BRW_MATH_FUNCTION_TAN 9 /* gen4 */ |
#define BRW_MATH_FUNCTION_FDIV 9 /* gen6+ */ |
#define BRW_MATH_FUNCTION_POW 10 |
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 |
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 |
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 |
#define BRW_MATH_INTEGER_UNSIGNED 0 |
#define BRW_MATH_INTEGER_SIGNED 1 |
#define BRW_MATH_PRECISION_FULL 0 |
#define BRW_MATH_PRECISION_PARTIAL 1 |
#define BRW_MATH_SATURATE_NONE 0 |
#define BRW_MATH_SATURATE_SATURATE 1 |
#define BRW_MATH_DATA_VECTOR 0 |
#define BRW_MATH_DATA_SCALAR 1 |
#define BRW_URB_OPCODE_WRITE 0 |
#define BRW_URB_SWIZZLE_NONE 0 |
#define BRW_URB_SWIZZLE_INTERLEAVE 1 |
#define BRW_URB_SWIZZLE_TRANSPOSE 2 |
#define BRW_SCRATCH_SPACE_SIZE_1K 0 |
#define BRW_SCRATCH_SPACE_SIZE_2K 1 |
#define BRW_SCRATCH_SPACE_SIZE_4K 2 |
#define BRW_SCRATCH_SPACE_SIZE_8K 3 |
#define BRW_SCRATCH_SPACE_SIZE_16K 4 |
#define BRW_SCRATCH_SPACE_SIZE_32K 5 |
#define BRW_SCRATCH_SPACE_SIZE_64K 6 |
#define BRW_SCRATCH_SPACE_SIZE_128K 7 |
#define BRW_SCRATCH_SPACE_SIZE_256K 8 |
#define BRW_SCRATCH_SPACE_SIZE_512K 9 |
#define BRW_SCRATCH_SPACE_SIZE_1M 10 |
#define BRW_SCRATCH_SPACE_SIZE_2M 11 |
#define REG_SIZE (8*4) |
struct brw_instruction { |
struct { |
unsigned opcode:7; |
unsigned pad:1; |
unsigned access_mode:1; |
unsigned mask_control:1; |
unsigned dependency_control:2; |
unsigned compression_control:2; /* gen6: quater control */ |
unsigned thread_control:2; |
unsigned predicate_control:4; |
unsigned predicate_inverse:1; |
unsigned execution_size:3; |
/** |
* Conditional Modifier for most instructions. On Gen6+, this is also |
* used for the SEND instruction's Message Target/SFID. |
*/ |
unsigned destreg__conditionalmod:4; |
unsigned acc_wr_control:1; |
unsigned cmpt_control:1; |
unsigned debug_control:1; |
unsigned saturate:1; |
} header; |
union { |
struct { |
unsigned dest_reg_file:2; |
unsigned dest_reg_type:3; |
unsigned src0_reg_file:2; |
unsigned src0_reg_type:3; |
unsigned src1_reg_file:2; |
unsigned src1_reg_type:3; |
unsigned pad:1; |
unsigned dest_subreg_nr:5; |
unsigned dest_reg_nr:8; |
unsigned dest_horiz_stride:2; |
unsigned dest_address_mode:1; |
} da1; |
struct { |
unsigned dest_reg_file:2; |
unsigned dest_reg_type:3; |
unsigned src0_reg_file:2; |
unsigned src0_reg_type:3; |
unsigned src1_reg_file:2; /* 0x00000c00 */ |
unsigned src1_reg_type:3; /* 0x00007000 */ |
unsigned pad:1; |
int dest_indirect_offset:10; /* offset against the deref'd address reg */ |
unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ |
unsigned dest_horiz_stride:2; |
unsigned dest_address_mode:1; |
} ia1; |
struct { |
unsigned dest_reg_file:2; |
unsigned dest_reg_type:3; |
unsigned src0_reg_file:2; |
unsigned src0_reg_type:3; |
unsigned src1_reg_file:2; |
unsigned src1_reg_type:3; |
unsigned pad:1; |
unsigned dest_writemask:4; |
unsigned dest_subreg_nr:1; |
unsigned dest_reg_nr:8; |
unsigned dest_horiz_stride:2; |
unsigned dest_address_mode:1; |
} da16; |
struct { |
unsigned dest_reg_file:2; |
unsigned dest_reg_type:3; |
unsigned src0_reg_file:2; |
unsigned src0_reg_type:3; |
unsigned pad0:6; |
unsigned dest_writemask:4; |
int dest_indirect_offset:6; |
unsigned dest_subreg_nr:3; |
unsigned dest_horiz_stride:2; |
unsigned dest_address_mode:1; |
} ia16; |
struct { |
unsigned dest_reg_file:2; |
unsigned dest_reg_type:3; |
unsigned src0_reg_file:2; |
unsigned src0_reg_type:3; |
unsigned src1_reg_file:2; |
unsigned src1_reg_type:3; |
unsigned pad:1; |
int jump_count:16; |
} branch_gen6; |
struct { |
unsigned dest_reg_file:1; |
unsigned flag_subreg_num:1; |
unsigned pad0:2; |
unsigned src0_abs:1; |
unsigned src0_negate:1; |
unsigned src1_abs:1; |
unsigned src1_negate:1; |
unsigned src2_abs:1; |
unsigned src2_negate:1; |
unsigned pad1:7; |
unsigned dest_writemask:4; |
unsigned dest_subreg_nr:3; |
unsigned dest_reg_nr:8; |
} da3src; |
} bits1; |
union { |
struct { |
unsigned src0_subreg_nr:5; |
unsigned src0_reg_nr:8; |
unsigned src0_abs:1; |
unsigned src0_negate:1; |
unsigned src0_address_mode:1; |
unsigned src0_horiz_stride:2; |
unsigned src0_width:3; |
unsigned src0_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad:5; |
} da1; |
struct { |
int src0_indirect_offset:10; |
unsigned src0_subreg_nr:3; |
unsigned src0_abs:1; |
unsigned src0_negate:1; |
unsigned src0_address_mode:1; |
unsigned src0_horiz_stride:2; |
unsigned src0_width:3; |
unsigned src0_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad:5; |
} ia1; |
struct { |
unsigned src0_swz_x:2; |
unsigned src0_swz_y:2; |
unsigned src0_subreg_nr:1; |
unsigned src0_reg_nr:8; |
unsigned src0_abs:1; |
unsigned src0_negate:1; |
unsigned src0_address_mode:1; |
unsigned src0_swz_z:2; |
unsigned src0_swz_w:2; |
unsigned pad0:1; |
unsigned src0_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad1:5; |
} da16; |
struct { |
unsigned src0_swz_x:2; |
unsigned src0_swz_y:2; |
int src0_indirect_offset:6; |
unsigned src0_subreg_nr:3; |
unsigned src0_abs:1; |
unsigned src0_negate:1; |
unsigned src0_address_mode:1; |
unsigned src0_swz_z:2; |
unsigned src0_swz_w:2; |
unsigned pad0:1; |
unsigned src0_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad1:5; |
} ia16; |
/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction. |
* |
* Does not apply to Gen6+. The SFID/message target moved to bits |
* 27:24 of the header (destreg__conditionalmod); EOT is in bits3. |
*/ |
struct { |
unsigned pad:26; |
unsigned end_of_thread:1; |
unsigned pad1:1; |
unsigned sfid:4; |
} send_gen5; /* for Ironlake only */ |
struct { |
unsigned src0_rep_ctrl:1; |
unsigned src0_swizzle:8; |
unsigned src0_subreg_nr:3; |
unsigned src0_reg_nr:8; |
unsigned pad0:1; |
unsigned src1_rep_ctrl:1; |
unsigned src1_swizzle:8; |
unsigned src1_subreg_nr_low:2; |
} da3src; |
} bits2; |
union { |
struct { |
unsigned src1_subreg_nr:5; |
unsigned src1_reg_nr:8; |
unsigned src1_abs:1; |
unsigned src1_negate:1; |
unsigned src1_address_mode:1; |
unsigned src1_horiz_stride:2; |
unsigned src1_width:3; |
unsigned src1_vert_stride:4; |
unsigned pad0:7; |
} da1; |
struct { |
unsigned src1_swz_x:2; |
unsigned src1_swz_y:2; |
unsigned src1_subreg_nr:1; |
unsigned src1_reg_nr:8; |
unsigned src1_abs:1; |
unsigned src1_negate:1; |
unsigned src1_address_mode:1; |
unsigned src1_swz_z:2; |
unsigned src1_swz_w:2; |
unsigned pad1:1; |
unsigned src1_vert_stride:4; |
unsigned pad2:7; |
} da16; |
struct { |
int src1_indirect_offset:10; |
unsigned src1_subreg_nr:3; |
unsigned src1_abs:1; |
unsigned src1_negate:1; |
unsigned src1_address_mode:1; |
unsigned src1_horiz_stride:2; |
unsigned src1_width:3; |
unsigned src1_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad1:5; |
} ia1; |
struct { |
unsigned src1_swz_x:2; |
unsigned src1_swz_y:2; |
int src1_indirect_offset:6; |
unsigned src1_subreg_nr:3; |
unsigned src1_abs:1; |
unsigned src1_negate:1; |
unsigned pad0:1; |
unsigned src1_swz_z:2; |
unsigned src1_swz_w:2; |
unsigned pad1:1; |
unsigned src1_vert_stride:4; |
unsigned flag_subreg_nr:1; |
unsigned flag_reg_nr:1; |
unsigned pad2:5; |
} ia16; |
struct { |
int jump_count:16; /* note: signed */ |
unsigned pop_count:4; |
unsigned pad0:12; |
} if_else; |
/* This is also used for gen7 IF/ELSE instructions */ |
struct { |
/* Signed jump distance to the ip to jump to if all channels |
* are disabled after the break or continue. It should point |
* to the end of the innermost control flow block, as that's |
* where some channel could get re-enabled. |
*/ |
int jip:16; |
/* Signed jump distance to the location to resume execution |
* of this channel if it's enabled for the break or continue. |
*/ |
int uip:16; |
} break_cont; |
/** |
* \defgroup SEND instructions / Message Descriptors |
* |
* @{ |
*/ |
/** |
* Generic Message Descriptor for Gen4 SEND instructions. The structs |
* below expand function_control to something specific for their |
* message. Due to struct packing issues, they duplicate these bits. |
* |
* See the G45 PRM, Volume 4, Table 14-15. |
*/ |
struct { |
unsigned function_control:16; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} generic; |
/** |
* Generic Message Descriptor for Gen5-7 SEND instructions. |
* |
* See the Sandybridge PRM, Volume 2 Part 2, Table 8-15. (Sadly, most |
* of the information on the SEND instruction is missing from the public |
* Ironlake PRM.) |
* |
* The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies. |
* According to the SEND instruction description: |
* "The MSb of the message description, the EOT field, always comes from |
* bit 127 of the instruction word"...which is bit 31 of this field. |
*/ |
struct { |
unsigned function_control:19; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} generic_gen5; |
/** G45 PRM, Volume 4, Section 6.1.1.1 */ |
struct { |
unsigned function:4; |
unsigned int_type:1; |
unsigned precision:1; |
unsigned saturate:1; |
unsigned data_type:1; |
unsigned pad0:8; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} math; |
/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */ |
struct { |
unsigned function:4; |
unsigned int_type:1; |
unsigned precision:1; |
unsigned saturate:1; |
unsigned data_type:1; |
unsigned snapshot:1; |
unsigned pad0:10; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} math_gen5; |
/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */ |
struct { |
unsigned binding_table_index:8; |
unsigned sampler:4; |
unsigned return_format:2; |
unsigned msg_type:2; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} sampler; |
/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */ |
struct { |
unsigned binding_table_index:8; |
unsigned sampler:4; |
unsigned msg_type:4; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} sampler_g4x; |
/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */ |
struct { |
unsigned binding_table_index:8; |
unsigned sampler:4; |
unsigned msg_type:4; |
unsigned simd_mode:2; |
unsigned pad0:1; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} sampler_gen5; |
struct { |
unsigned binding_table_index:8; |
unsigned sampler:4; |
unsigned msg_type:5; |
unsigned simd_mode:2; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} sampler_gen7; |
struct brw_urb_immediate { |
unsigned opcode:4; |
unsigned offset:6; |
unsigned swizzle_control:2; |
unsigned pad:1; |
unsigned allocate:1; |
unsigned used:1; |
unsigned complete:1; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} urb; |
struct { |
unsigned opcode:4; |
unsigned offset:6; |
unsigned swizzle_control:2; |
unsigned pad:1; |
unsigned allocate:1; |
unsigned used:1; |
unsigned complete:1; |
unsigned pad0:3; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} urb_gen5; |
struct { |
unsigned opcode:3; |
unsigned offset:11; |
unsigned swizzle_control:1; |
unsigned complete:1; |
unsigned per_slot_offset:1; |
unsigned pad0:2; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} urb_gen7; |
/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:4; |
unsigned msg_type:2; |
unsigned target_cache:2; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} dp_read; |
/** G45 PRM, Volume 4, Section 5.10.1.1.2 */ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned msg_type:3; |
unsigned target_cache:2; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} dp_read_g4x; |
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned msg_type:3; |
unsigned target_cache:2; |
unsigned pad0:3; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} dp_read_gen5; |
/** G45 PRM, Volume 4, Section 5.10.1.1.2. For both Gen4 and G45. */ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned last_render_target:1; |
unsigned msg_type:3; |
unsigned send_commit_msg:1; |
unsigned response_length:4; |
unsigned msg_length:4; |
unsigned msg_target:4; |
unsigned pad1:3; |
unsigned end_of_thread:1; |
} dp_write; |
/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned last_render_target:1; |
unsigned msg_type:3; |
unsigned send_commit_msg:1; |
unsigned pad0:3; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} dp_write_gen5; |
/** |
* Message for the Sandybridge Sampler Cache or Constant Cache Data Port. |
* |
* See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1. |
**/ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:5; |
unsigned msg_type:3; |
unsigned pad0:3; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} gen6_dp_sampler_const_cache; |
/** |
* Message for the Sandybridge Render Cache Data Port. |
* |
* Most fields are defined in the Sandybridge PRM, Volume 4 Part 1, |
* Section 3.9.2.1.1: Message Descriptor. |
* |
* "Slot Group Select" and "Last Render Target" are part of the |
* 5-bit message control for Render Target Write messages. See |
* Section 3.9.9.2.1 of the same volume. |
*/ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned slot_group_select:1; |
unsigned last_render_target:1; |
unsigned msg_type:4; |
unsigned send_commit_msg:1; |
unsigned pad0:1; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad1:2; |
unsigned end_of_thread:1; |
} gen6_dp; |
/** |
* Message for any of the Gen7 Data Port caches. |
* |
* Most fields are defined in BSpec volume 5c.2 Data Port / Messages / |
* Data Port Messages / Message Descriptor. Once again, "Slot Group |
* Select" and "Last Render Target" are part of the 6-bit message |
* control for Render Target Writes. |
*/ |
struct { |
unsigned binding_table_index:8; |
unsigned msg_control:3; |
unsigned slot_group_select:1; |
unsigned last_render_target:1; |
unsigned msg_control_pad:1; |
unsigned msg_type:4; |
unsigned pad1:1; |
unsigned header_present:1; |
unsigned response_length:5; |
unsigned msg_length:4; |
unsigned pad2:2; |
unsigned end_of_thread:1; |
} gen7_dp; |
/** @} */ |
struct { |
unsigned src1_subreg_nr_high:1; |
unsigned src1_reg_nr:8; |
unsigned pad0:1; |
unsigned src2_rep_ctrl:1; |
unsigned src2_swizzle:8; |
unsigned src2_subreg_nr:3; |
unsigned src2_reg_nr:8; |
unsigned pad1:2; |
} da3src; |
int d; |
unsigned ud; |
float f; |
} bits3; |
}; |
/* These aren't hardware structs, just something useful for us to pass around: |
* |
* Align1 operation has a lot of control over input ranges. Used in |
* WM programs to implement shaders decomposed into "channel serial" |
* or "structure of array" form: |
*/ |
struct brw_reg { |
unsigned type:4; |
unsigned file:2; |
unsigned nr:8; |
unsigned subnr:5; /* :1 in align16 */ |
unsigned negate:1; /* source only */ |
unsigned abs:1; /* source only */ |
unsigned vstride:4; /* source only */ |
unsigned width:3; /* src only, align1 only */ |
unsigned hstride:2; /* align1 only */ |
unsigned address_mode:1; /* relative addressing, hopefully! */ |
unsigned pad0:1; |
union { |
struct { |
unsigned swizzle:8; /* src only, align16 only */ |
unsigned writemask:4; /* dest only, align16 only */ |
int indirect_offset:10; /* relative addressing offset */ |
unsigned pad1:10; /* two dwords total */ |
} bits; |
float f; |
int d; |
unsigned ud; |
} dw1; |
}; |
struct brw_indirect { |
unsigned addr_subnr:4; |
int addr_offset:10; |
unsigned pad:18; |
}; |
#define BRW_EU_MAX_INSN_STACK 5 |
#define BRW_EU_MAX_INSN 10000 |
struct brw_compile { |
struct brw_instruction *store; |
unsigned nr_insn; |
int gen; |
/* Allow clients to push/pop instruction state: |
*/ |
struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; |
bool compressed_stack[BRW_EU_MAX_INSN_STACK]; |
struct brw_instruction *current; |
unsigned flag_value; |
bool single_program_flow; |
bool compressed; |
/* Control flow stacks: |
* - if_stack contains IF and ELSE instructions which must be patched |
* (and popped) once the matching ENDIF instruction is encountered. |
*/ |
struct brw_instruction **if_stack; |
int if_stack_depth; |
int if_stack_array_size; |
}; |
static inline int type_sz(unsigned type) |
{ |
switch (type) { |
case BRW_REGISTER_TYPE_UD: |
case BRW_REGISTER_TYPE_D: |
case BRW_REGISTER_TYPE_F: |
return 4; |
case BRW_REGISTER_TYPE_HF: |
case BRW_REGISTER_TYPE_UW: |
case BRW_REGISTER_TYPE_W: |
return 2; |
case BRW_REGISTER_TYPE_UB: |
case BRW_REGISTER_TYPE_B: |
return 1; |
default: |
return 0; |
} |
} |
/** |
* Construct a brw_reg. |
* \param file one of the BRW_x_REGISTER_FILE values |
* \param nr register number/index |
* \param subnr register sub number |
* \param type one of BRW_REGISTER_TYPE_x |
* \param vstride one of BRW_VERTICAL_STRIDE_x |
* \param width one of BRW_WIDTH_x |
* \param hstride one of BRW_HORIZONTAL_STRIDE_x |
* \param swizzle one of BRW_SWIZZLE_x |
* \param writemask WRITEMASK_X/Y/Z/W bitfield |
*/ |
static inline struct brw_reg brw_reg(unsigned file, |
unsigned nr, |
unsigned subnr, |
unsigned type, |
unsigned vstride, |
unsigned width, |
unsigned hstride, |
unsigned swizzle, |
unsigned writemask) |
{ |
struct brw_reg reg; |
if (file == BRW_GENERAL_REGISTER_FILE) |
assert(nr < BRW_MAX_GRF); |
else if (file == BRW_MESSAGE_REGISTER_FILE) |
assert((nr & ~(1 << 7)) < BRW_MAX_MRF); |
else if (file == BRW_ARCHITECTURE_REGISTER_FILE) |
assert(nr <= BRW_ARF_IP); |
reg.type = type; |
reg.file = file; |
reg.nr = nr; |
reg.subnr = subnr * type_sz(type); |
reg.negate = 0; |
reg.abs = 0; |
reg.vstride = vstride; |
reg.width = width; |
reg.hstride = hstride; |
reg.address_mode = BRW_ADDRESS_DIRECT; |
reg.pad0 = 0; |
/* Could do better: If the reg is r5.3<0;1,0>, we probably want to |
* set swizzle and writemask to W, as the lower bits of subnr will |
* be lost when converted to align16. This is probably too much to |
* keep track of as you'd want it adjusted by suboffset(), etc. |
* Perhaps fix up when converting to align16? |
*/ |
reg.dw1.bits.swizzle = swizzle; |
reg.dw1.bits.writemask = writemask; |
reg.dw1.bits.indirect_offset = 0; |
reg.dw1.bits.pad1 = 0; |
return reg; |
} |
/** Construct float[16] register */ |
static inline struct brw_reg brw_vec16_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return brw_reg(file, |
nr, |
subnr, |
BRW_REGISTER_TYPE_F, |
BRW_VERTICAL_STRIDE_16, |
BRW_WIDTH_16, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYZW, |
WRITEMASK_XYZW); |
} |
/** Construct float[8] register */ |
static inline struct brw_reg brw_vec8_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return brw_reg(file, |
nr, |
subnr, |
BRW_REGISTER_TYPE_F, |
BRW_VERTICAL_STRIDE_8, |
BRW_WIDTH_8, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYZW, |
WRITEMASK_XYZW); |
} |
/** Construct float[4] register */ |
static inline struct brw_reg brw_vec4_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return brw_reg(file, |
nr, |
subnr, |
BRW_REGISTER_TYPE_F, |
BRW_VERTICAL_STRIDE_4, |
BRW_WIDTH_4, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYZW, |
WRITEMASK_XYZW); |
} |
/** Construct float[2] register */ |
static inline struct brw_reg brw_vec2_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return brw_reg(file, |
nr, |
subnr, |
BRW_REGISTER_TYPE_F, |
BRW_VERTICAL_STRIDE_2, |
BRW_WIDTH_2, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYXY, |
WRITEMASK_XY); |
} |
/** Construct float[1] register */ |
static inline struct brw_reg brw_vec1_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return brw_reg(file, |
nr, |
subnr, |
BRW_REGISTER_TYPE_F, |
BRW_VERTICAL_STRIDE_0, |
BRW_WIDTH_1, |
BRW_HORIZONTAL_STRIDE_0, |
BRW_SWIZZLE_XXXX, |
WRITEMASK_X); |
} |
static inline struct brw_reg __retype(struct brw_reg reg, |
unsigned type) |
{ |
reg.type = type; |
return reg; |
} |
static inline struct brw_reg __retype_d(struct brw_reg reg) |
{ |
return __retype(reg, BRW_REGISTER_TYPE_D); |
} |
static inline struct brw_reg __retype_ud(struct brw_reg reg) |
{ |
return __retype(reg, BRW_REGISTER_TYPE_UD); |
} |
static inline struct brw_reg __retype_uw(struct brw_reg reg) |
{ |
return __retype(reg, BRW_REGISTER_TYPE_UW); |
} |
static inline struct brw_reg __sechalf(struct brw_reg reg) |
{ |
if (reg.vstride) |
reg.nr++; |
return reg; |
} |
static inline struct brw_reg __suboffset(struct brw_reg reg, |
unsigned delta) |
{ |
reg.subnr += delta * type_sz(reg.type); |
return reg; |
} |
static inline struct brw_reg __offset(struct brw_reg reg, |
unsigned delta) |
{ |
reg.nr += delta; |
return reg; |
} |
static inline struct brw_reg byte_offset(struct brw_reg reg, |
unsigned bytes) |
{ |
unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; |
reg.nr = newoffset / REG_SIZE; |
reg.subnr = newoffset % REG_SIZE; |
return reg; |
} |
/** Construct unsigned word[16] register */ |
static inline struct brw_reg brw_uw16_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); |
} |
/** Construct unsigned word[8] register */ |
static inline struct brw_reg brw_uw8_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); |
} |
/** Construct unsigned word[1] register */ |
static inline struct brw_reg brw_uw1_reg(unsigned file, |
unsigned nr, |
unsigned subnr) |
{ |
return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); |
} |
static inline struct brw_reg brw_imm_reg(unsigned type) |
{ |
return brw_reg( BRW_IMMEDIATE_VALUE, |
0, |
0, |
type, |
BRW_VERTICAL_STRIDE_0, |
BRW_WIDTH_1, |
BRW_HORIZONTAL_STRIDE_0, |
0, |
0); |
} |
/** Construct float immediate register */ |
static inline struct brw_reg brw_imm_f(float f) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); |
imm.dw1.f = f; |
return imm; |
} |
/** Construct integer immediate register */ |
static inline struct brw_reg brw_imm_d(int d) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); |
imm.dw1.d = d; |
return imm; |
} |
/** Construct uint immediate register */ |
static inline struct brw_reg brw_imm_ud(unsigned ud) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); |
imm.dw1.ud = ud; |
return imm; |
} |
/** Construct ushort immediate register */ |
static inline struct brw_reg brw_imm_uw(uint16_t uw) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); |
imm.dw1.ud = uw | (uw << 16); |
return imm; |
} |
/** Construct short immediate register */ |
static inline struct brw_reg brw_imm_w(int16_t w) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); |
imm.dw1.d = w | (w << 16); |
return imm; |
} |
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type |
* numbers alias with _V and _VF below: |
*/ |
/** Construct vector of eight signed half-byte values */ |
static inline struct brw_reg brw_imm_v(unsigned v) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); |
imm.vstride = BRW_VERTICAL_STRIDE_0; |
imm.width = BRW_WIDTH_8; |
imm.hstride = BRW_HORIZONTAL_STRIDE_1; |
imm.dw1.ud = v; |
return imm; |
} |
/** Construct vector of four 8-bit float values */ |
static inline struct brw_reg brw_imm_vf(unsigned v) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); |
imm.vstride = BRW_VERTICAL_STRIDE_0; |
imm.width = BRW_WIDTH_4; |
imm.hstride = BRW_HORIZONTAL_STRIDE_1; |
imm.dw1.ud = v; |
return imm; |
} |
#define VF_ZERO 0x0 |
#define VF_ONE 0x30 |
#define VF_NEG (1<<7) |
static inline struct brw_reg brw_imm_vf4(unsigned v0, |
unsigned v1, |
unsigned v2, |
unsigned v3) |
{ |
struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); |
imm.vstride = BRW_VERTICAL_STRIDE_0; |
imm.width = BRW_WIDTH_4; |
imm.hstride = BRW_HORIZONTAL_STRIDE_1; |
imm.dw1.ud = ((v0 << 0) | |
(v1 << 8) | |
(v2 << 16) | |
(v3 << 24)); |
return imm; |
} |
static inline struct brw_reg brw_address(struct brw_reg reg) |
{ |
return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); |
} |
/** Construct float[1] general-purpose register */ |
static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr) |
{ |
return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
/** Construct float[2] general-purpose register */ |
static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr) |
{ |
return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
/** Construct float[4] general-purpose register */ |
static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr) |
{ |
return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
/** Construct float[8] general-purpose register */ |
static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr) |
{ |
return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr) |
{ |
return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr) |
{ |
return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); |
} |
/** Construct null register (usually used for setting condition codes) */ |
static inline struct brw_reg brw_null_reg(void) |
{ |
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_NULL, |
0); |
} |
static inline struct brw_reg brw_address_reg(unsigned subnr) |
{ |
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_ADDRESS, |
subnr); |
} |
/* If/else instructions break in align16 mode if writemask & swizzle |
* aren't xyzw. This goes against the convention for other scalar |
* regs: |
*/ |
static inline struct brw_reg brw_ip_reg(void) |
{ |
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_IP, |
0, |
BRW_REGISTER_TYPE_UD, |
BRW_VERTICAL_STRIDE_4, /* ? */ |
BRW_WIDTH_1, |
BRW_HORIZONTAL_STRIDE_0, |
BRW_SWIZZLE_XYZW, /* NOTE! */ |
WRITEMASK_XYZW); /* NOTE! */ |
} |
static inline struct brw_reg brw_acc_reg(void) |
{ |
return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_ACCUMULATOR, |
0); |
} |
static inline struct brw_reg brw_notification_1_reg(void) |
{ |
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_NOTIFICATION_COUNT, |
1, |
BRW_REGISTER_TYPE_UD, |
BRW_VERTICAL_STRIDE_0, |
BRW_WIDTH_1, |
BRW_HORIZONTAL_STRIDE_0, |
BRW_SWIZZLE_XXXX, |
WRITEMASK_X); |
} |
static inline struct brw_reg brw_flag_reg(void) |
{ |
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_FLAG, |
0); |
} |
static inline struct brw_reg brw_mask_reg(unsigned subnr) |
{ |
return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
BRW_ARF_MASK, |
subnr); |
} |
static inline struct brw_reg brw_message_reg(unsigned nr) |
{ |
assert((nr & ~(1 << 7)) < BRW_MAX_MRF); |
return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); |
} |
static inline struct brw_reg brw_message4_reg(unsigned nr, |
int subnr) |
{ |
assert((nr & ~(1 << 7)) < BRW_MAX_MRF); |
return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr); |
} |
/* This is almost always called with a numeric constant argument, so |
* make things easy to evaluate at compile time: |
*/ |
static inline unsigned cvt(unsigned val) |
{ |
switch (val) { |
case 0: return 0; |
case 1: return 1; |
case 2: return 2; |
case 4: return 3; |
case 8: return 4; |
case 16: return 5; |
case 32: return 6; |
} |
return 0; |
} |
static inline struct brw_reg __stride(struct brw_reg reg, |
unsigned vstride, |
unsigned width, |
unsigned hstride) |
{ |
reg.vstride = cvt(vstride); |
reg.width = cvt(width) - 1; |
reg.hstride = cvt(hstride); |
return reg; |
} |
static inline struct brw_reg vec16(struct brw_reg reg) |
{ |
return __stride(reg, 16,16,1); |
} |
static inline struct brw_reg vec8(struct brw_reg reg) |
{ |
return __stride(reg, 8,8,1); |
} |
static inline struct brw_reg vec4(struct brw_reg reg) |
{ |
return __stride(reg, 4,4,1); |
} |
static inline struct brw_reg vec2(struct brw_reg reg) |
{ |
return __stride(reg, 2,2,1); |
} |
static inline struct brw_reg vec1(struct brw_reg reg) |
{ |
return __stride(reg, 0,1,0); |
} |
static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt) |
{ |
return vec1(__suboffset(reg, elt)); |
} |
static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt) |
{ |
return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt)); |
} |
static inline struct brw_reg brw_swizzle(struct brw_reg reg, |
unsigned x, |
unsigned y, |
unsigned z, |
unsigned w) |
{ |
assert(reg.file != BRW_IMMEDIATE_VALUE); |
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), |
BRW_GET_SWZ(reg.dw1.bits.swizzle, y), |
BRW_GET_SWZ(reg.dw1.bits.swizzle, z), |
BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); |
return reg; |
} |
static inline struct brw_reg brw_swizzle1(struct brw_reg reg, |
unsigned x) |
{ |
return brw_swizzle(reg, x, x, x, x); |
} |
static inline struct brw_reg brw_writemask(struct brw_reg reg, |
unsigned mask) |
{ |
assert(reg.file != BRW_IMMEDIATE_VALUE); |
reg.dw1.bits.writemask &= mask; |
return reg; |
} |
static inline struct brw_reg brw_set_writemask(struct brw_reg reg, |
unsigned mask) |
{ |
assert(reg.file != BRW_IMMEDIATE_VALUE); |
reg.dw1.bits.writemask = mask; |
return reg; |
} |
static inline struct brw_reg brw_negate(struct brw_reg reg) |
{ |
reg.negate ^= 1; |
return reg; |
} |
static inline struct brw_reg brw_abs(struct brw_reg reg) |
{ |
reg.abs = 1; |
return reg; |
} |
/*********************************************************************** |
*/ |
static inline struct brw_reg brw_vec4_indirect(unsigned subnr, |
int offset) |
{ |
struct brw_reg reg = brw_vec4_grf(0, 0); |
reg.subnr = subnr; |
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; |
reg.dw1.bits.indirect_offset = offset; |
return reg; |
} |
static inline struct brw_reg brw_vec1_indirect(unsigned subnr, |
int offset) |
{ |
struct brw_reg reg = brw_vec1_grf(0, 0); |
reg.subnr = subnr; |
reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; |
reg.dw1.bits.indirect_offset = offset; |
return reg; |
} |
static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) |
{ |
return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); |
} |
static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) |
{ |
return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); |
} |
static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) |
{ |
return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); |
} |
static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) |
{ |
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); |
} |
static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset) |
{ |
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); |
} |
static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) |
{ |
return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); |
} |
static inline struct brw_reg get_addr_reg(struct brw_indirect ptr) |
{ |
return brw_address_reg(ptr.addr_subnr); |
} |
static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset) |
{ |
ptr.addr_offset += offset; |
return ptr; |
} |
static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset) |
{ |
struct brw_indirect ptr; |
ptr.addr_subnr = addr_subnr; |
ptr.addr_offset = offset; |
ptr.pad = 0; |
return ptr; |
} |
/** Do two brw_regs refer to the same register? */ |
static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2) |
{ |
return r1.file == r2.file && r1.nr == r2.nr; |
} |
static inline struct brw_instruction *current_insn( struct brw_compile *p) |
{ |
return &p->store[p->nr_insn]; |
} |
static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) |
{ |
p->current->header.predicate_control = pc; |
} |
static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse) |
{ |
p->current->header.predicate_inverse = predicate_inverse; |
} |
static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) |
{ |
p->current->header.destreg__conditionalmod = conditional; |
} |
static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode) |
{ |
p->current->header.access_mode = access_mode; |
} |
static inline void brw_set_mask_control(struct brw_compile *p, unsigned value) |
{ |
p->current->header.mask_control = value; |
} |
static inline void brw_set_saturate(struct brw_compile *p, unsigned value) |
{ |
p->current->header.saturate = value; |
} |
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value) |
{ |
if (p->gen >= 060) |
p->current->header.acc_wr_control = value; |
} |
void brw_pop_insn_state(struct brw_compile *p); |
void brw_push_insn_state(struct brw_compile *p); |
void brw_set_compression_control(struct brw_compile *p, enum brw_compression control); |
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); |
void brw_compile_init(struct brw_compile *p, int gen, void *store); |
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, |
struct brw_reg dest); |
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, |
struct brw_reg reg); |
void brw_set_src1(struct brw_compile *p, |
struct brw_instruction *insn, |
struct brw_reg reg); |
void gen6_resolve_implied_move(struct brw_compile *p, |
struct brw_reg *src, |
unsigned msg_reg_nr); |
static inline struct brw_instruction * |
brw_next_insn(struct brw_compile *p, unsigned opcode) |
{ |
struct brw_instruction *insn; |
assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); |
insn = &p->store[p->nr_insn++]; |
*insn = *p->current; |
if (p->current->header.destreg__conditionalmod) { |
p->current->header.destreg__conditionalmod = 0; |
p->current->header.predicate_control = BRW_PREDICATE_NORMAL; |
} |
insn->header.opcode = opcode; |
return insn; |
} |
/* Helpers for regular instructions: */ |
#define ALU1(OP) \ |
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ |
struct brw_reg dest, \ |
struct brw_reg src0) \ |
{ \ |
return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ |
} |
#define ALU2(OP) \ |
static inline struct brw_instruction *brw_##OP(struct brw_compile *p, \ |
struct brw_reg dest, \ |
struct brw_reg src0, \ |
struct brw_reg src1) \ |
{ \ |
return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ |
} |
/* Rounding operations (other than RNDD) require two instructions - the first |
* stores a rounded value (possibly the wrong way) in the dest register, but |
* also sets a per-channel "increment bit" in the flag register. A predicated |
* add of 1.0 fixes dest to contain the desired result. |
* |
* Sandybridge and later appear to round correctly without an ADD. |
*/ |
#define ROUND(OP) \ |
static inline void brw_##OP(struct brw_compile *p, \ |
struct brw_reg dest, \ |
struct brw_reg src) \ |
{ \ |
struct brw_instruction *rnd, *add; \ |
rnd = brw_next_insn(p, BRW_OPCODE_##OP); \ |
brw_set_dest(p, rnd, dest); \ |
brw_set_src0(p, rnd, src); \ |
if (p->gen < 060) { \ |
/* turn on round-increments */ \ |
rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ |
add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ |
add->header.predicate_control = BRW_PREDICATE_NORMAL; \ |
} \ |
} |
static inline struct brw_instruction *brw_alu1(struct brw_compile *p, |
unsigned opcode, |
struct brw_reg dest, |
struct brw_reg src) |
{ |
struct brw_instruction *insn = brw_next_insn(p, opcode); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
return insn; |
} |
static inline struct brw_instruction *brw_alu2(struct brw_compile *p, |
unsigned opcode, |
struct brw_reg dest, |
struct brw_reg src0, |
struct brw_reg src1 ) |
{ |
struct brw_instruction *insn = brw_next_insn(p, opcode); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, src1); |
return insn; |
} |
static inline struct brw_instruction *brw_ADD(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg src0, |
struct brw_reg src1) |
{ |
/* 6.2.2: add */ |
if (src0.type == BRW_REGISTER_TYPE_F || |
(src0.file == BRW_IMMEDIATE_VALUE && |
src0.type == BRW_REGISTER_TYPE_VF)) { |
assert(src1.type != BRW_REGISTER_TYPE_UD); |
assert(src1.type != BRW_REGISTER_TYPE_D); |
} |
if (src1.type == BRW_REGISTER_TYPE_F || |
(src1.file == BRW_IMMEDIATE_VALUE && |
src1.type == BRW_REGISTER_TYPE_VF)) { |
assert(src0.type != BRW_REGISTER_TYPE_UD); |
assert(src0.type != BRW_REGISTER_TYPE_D); |
} |
return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); |
} |
static inline struct brw_instruction *brw_MUL(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg src0, |
struct brw_reg src1) |
{ |
/* 6.32.38: mul */ |
if (src0.type == BRW_REGISTER_TYPE_D || |
src0.type == BRW_REGISTER_TYPE_UD || |
src1.type == BRW_REGISTER_TYPE_D || |
src1.type == BRW_REGISTER_TYPE_UD) { |
assert(dest.type != BRW_REGISTER_TYPE_F); |
} |
if (src0.type == BRW_REGISTER_TYPE_F || |
(src0.file == BRW_IMMEDIATE_VALUE && |
src0.type == BRW_REGISTER_TYPE_VF)) { |
assert(src1.type != BRW_REGISTER_TYPE_UD); |
assert(src1.type != BRW_REGISTER_TYPE_D); |
} |
if (src1.type == BRW_REGISTER_TYPE_F || |
(src1.file == BRW_IMMEDIATE_VALUE && |
src1.type == BRW_REGISTER_TYPE_VF)) { |
assert(src0.type != BRW_REGISTER_TYPE_UD); |
assert(src0.type != BRW_REGISTER_TYPE_D); |
} |
assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || |
src0.nr != BRW_ARF_ACCUMULATOR); |
assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || |
src1.nr != BRW_ARF_ACCUMULATOR); |
return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); |
} |
static inline struct brw_instruction *brw_JMPI(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg src0, |
struct brw_reg src1) |
{ |
struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); |
insn->header.execution_size = 1; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.mask_control = BRW_MASK_DISABLE; |
p->current->header.predicate_control = BRW_PREDICATE_NONE; |
return insn; |
} |
ALU1(MOV); |
ALU2(SEL); |
ALU1(NOT); |
ALU2(AND); |
ALU2(OR); |
ALU2(XOR); |
ALU2(SHR); |
ALU2(SHL); |
ALU2(RSR); |
ALU2(RSL); |
ALU2(ASR); |
ALU1(FRC); |
ALU1(RNDD); |
ALU2(MAC); |
ALU2(MACH); |
ALU1(LZD); |
ALU2(DP4); |
ALU2(DPH); |
ALU2(DP3); |
ALU2(DP2); |
ALU2(LINE); |
ALU2(PLN); |
ROUND(RNDZ); |
ROUND(RNDE); |
#undef ALU1 |
#undef ALU2 |
#undef ROUND |
/* Helpers for SEND instruction */ |
void brw_set_dp_read_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned binding_table_index, |
unsigned msg_control, |
unsigned msg_type, |
unsigned target_cache, |
unsigned msg_length, |
unsigned response_length); |
void brw_set_dp_write_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned binding_table_index, |
unsigned msg_control, |
unsigned msg_type, |
unsigned msg_length, |
bool header_present, |
bool last_render_target, |
unsigned response_length, |
bool end_of_thread, |
bool send_commit_msg); |
void brw_urb_WRITE(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
bool allocate, |
bool used, |
unsigned msg_length, |
unsigned response_length, |
bool eot, |
bool writes_complete, |
unsigned offset, |
unsigned swizzle); |
void brw_ff_sync(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
bool allocate, |
unsigned response_length, |
bool eot); |
void brw_fb_WRITE(struct brw_compile *p, |
int dispatch_width, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
unsigned msg_control, |
unsigned binding_table_index, |
unsigned msg_length, |
unsigned response_length, |
bool eot, |
bool header_present); |
void brw_SAMPLE(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
unsigned binding_table_index, |
unsigned sampler, |
unsigned writemask, |
unsigned msg_type, |
unsigned response_length, |
unsigned msg_length, |
bool header_present, |
unsigned simd_mode); |
void brw_math_16(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
unsigned saturate, |
unsigned msg_reg_nr, |
struct brw_reg src, |
unsigned precision); |
void brw_math(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
unsigned saturate, |
unsigned msg_reg_nr, |
struct brw_reg src, |
unsigned data_type, |
unsigned precision); |
void brw_math2(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
struct brw_reg src0, |
struct brw_reg src1); |
void brw_oword_block_read(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
uint32_t offset, |
uint32_t bind_table_index); |
void brw_oword_block_read_scratch(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
int num_regs, |
unsigned offset); |
void brw_oword_block_write_scratch(struct brw_compile *p, |
struct brw_reg mrf, |
int num_regs, |
unsigned offset); |
void brw_dword_scattered_read(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
uint32_t bind_table_index); |
void brw_dp_READ_4_vs(struct brw_compile *p, |
struct brw_reg dest, |
unsigned location, |
unsigned bind_table_index); |
void brw_dp_READ_4_vs_relative(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg addrReg, |
unsigned offset, |
unsigned bind_table_index); |
/* If/else/endif. Works by manipulating the execution flags on each |
* channel. |
*/ |
struct brw_instruction *brw_IF(struct brw_compile *p, |
unsigned execute_size); |
struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional, |
struct brw_reg src0, struct brw_reg src1); |
void brw_ELSE(struct brw_compile *p); |
void brw_ENDIF(struct brw_compile *p); |
/* DO/WHILE loops: |
*/ |
struct brw_instruction *brw_DO(struct brw_compile *p, |
unsigned execute_size); |
struct brw_instruction *brw_WHILE(struct brw_compile *p, |
struct brw_instruction *patch_insn); |
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); |
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); |
struct brw_instruction *gen6_CONT(struct brw_compile *p, |
struct brw_instruction *do_insn); |
/* Forward jumps: |
*/ |
void brw_land_fwd_jump(struct brw_compile *p, |
struct brw_instruction *jmp_insn); |
void brw_NOP(struct brw_compile *p); |
void brw_WAIT(struct brw_compile *p); |
/* Special case: there is never a destination, execution size will be |
* taken from src0: |
*/ |
void brw_CMP(struct brw_compile *p, |
struct brw_reg dest, |
unsigned conditional, |
struct brw_reg src0, |
struct brw_reg src1); |
void brw_print_reg(struct brw_reg reg); |
static inline void brw_math_invert(struct brw_compile *p, |
struct brw_reg dst, |
struct brw_reg src) |
{ |
brw_math(p, |
dst, |
BRW_MATH_FUNCTION_INV, |
BRW_MATH_SATURATE_NONE, |
0, |
src, |
BRW_MATH_PRECISION_FULL, |
BRW_MATH_DATA_VECTOR); |
} |
void brw_set_uip_jip(struct brw_compile *p); |
uint32_t brw_swap_cmod(uint32_t cmod); |
void brw_disasm(FILE *file, |
const struct brw_instruction *inst, |
int gen); |
#endif |
/drivers/video/Intel-2D/brw/brw_eu_emit.c |
---|
0,0 → 1,2002 |
/* |
Copyright (C) Intel Corp. 2006. All Rights Reserved. |
Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
develop this 3D driver. |
Permission is hereby granted, free of charge, to any person obtaining |
a copy of this software and associated documentation files (the |
"Software"), to deal in the Software without restriction, including |
without limitation the rights to use, copy, modify, merge, publish, |
distribute, sublicense, and/or sell copies of the Software, and to |
permit persons to whom the Software is furnished to do so, subject to |
the following conditions: |
The above copyright notice and this permission notice (including the |
next paragraph) shall be included in all copies or substantial |
portions of the Software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
**********************************************************************/ |
/* |
* Authors: |
* Keith Whitwell <keith@tungstengraphics.com> |
*/ |
#include "brw_eu.h" |
#include <string.h> |
#include <stdlib.h> |
/*********************************************************************** |
* Internal helper for constructing instructions |
*/ |
static void guess_execution_size(struct brw_compile *p, |
struct brw_instruction *insn, |
struct brw_reg reg) |
{ |
if (reg.width == BRW_WIDTH_8 && p->compressed) |
insn->header.execution_size = BRW_EXECUTE_16; |
else |
insn->header.execution_size = reg.width; |
} |
/** |
* Prior to Sandybridge, the SEND instruction accepted non-MRF source |
* registers, implicitly moving the operand to a message register. |
* |
* On Sandybridge, this is no longer the case. This function performs the |
* explicit move; it should be called before emitting a SEND instruction. |
*/ |
void |
gen6_resolve_implied_move(struct brw_compile *p, |
struct brw_reg *src, |
unsigned msg_reg_nr) |
{ |
if (p->gen < 060) |
return; |
if (src->file == BRW_MESSAGE_REGISTER_FILE) |
return; |
if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { |
brw_push_insn_state(p); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); |
brw_pop_insn_state(p); |
} |
*src = brw_message_reg(msg_reg_nr); |
} |
static void |
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) |
{ |
/* From the BSpec / ISA Reference / send - [DevIVB+]: |
* "The send with EOT should use register space R112-R127 for <src>. This is |
* to enable loading of a new thread into the same slot while the message |
* with EOT for current thread is pending dispatch." |
* |
* Since we're pretending to have 16 MRFs anyway, we may as well use the |
* registers required for messages with EOT. |
*/ |
if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { |
reg->file = BRW_GENERAL_REGISTER_FILE; |
reg->nr += 111; |
} |
} |
void |
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, |
struct brw_reg dest) |
{ |
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && |
dest.file != BRW_MESSAGE_REGISTER_FILE) |
assert(dest.nr < 128); |
gen7_convert_mrf_to_grf(p, &dest); |
insn->bits1.da1.dest_reg_file = dest.file; |
insn->bits1.da1.dest_reg_type = dest.type; |
insn->bits1.da1.dest_address_mode = dest.address_mode; |
if (dest.address_mode == BRW_ADDRESS_DIRECT) { |
insn->bits1.da1.dest_reg_nr = dest.nr; |
if (insn->header.access_mode == BRW_ALIGN_1) { |
insn->bits1.da1.dest_subreg_nr = dest.subnr; |
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
insn->bits1.da1.dest_horiz_stride = dest.hstride; |
} else { |
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; |
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; |
/* even ignored in da16, still need to set as '01' */ |
insn->bits1.da16.dest_horiz_stride = 1; |
} |
} else { |
insn->bits1.ia1.dest_subreg_nr = dest.subnr; |
/* These are different sizes in align1 vs align16: |
*/ |
if (insn->header.access_mode == BRW_ALIGN_1) { |
insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; |
if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
insn->bits1.ia1.dest_horiz_stride = dest.hstride; |
} |
else { |
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; |
/* even ignored in da16, still need to set as '01' */ |
insn->bits1.ia16.dest_horiz_stride = 1; |
} |
} |
guess_execution_size(p, insn, dest); |
} |
static const int reg_type_size[8] = { |
[0] = 4, |
[1] = 4, |
[2] = 2, |
[3] = 2, |
[4] = 1, |
[5] = 1, |
[7] = 4 |
}; |
static void |
validate_reg(struct brw_instruction *insn, struct brw_reg reg) |
{ |
int hstride_for_reg[] = {0, 1, 2, 4}; |
int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; |
int width_for_reg[] = {1, 2, 4, 8, 16}; |
int execsize_for_reg[] = {1, 2, 4, 8, 16}; |
int width, hstride, vstride, execsize; |
if (reg.file == BRW_IMMEDIATE_VALUE) { |
/* 3.3.6: Region Parameters. Restriction: Immediate vectors |
* mean the destination has to be 128-bit aligned and the |
* destination horiz stride has to be a word. |
*/ |
if (reg.type == BRW_REGISTER_TYPE_V) { |
assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * |
reg_type_size[insn->bits1.da1.dest_reg_type] == 2); |
} |
return; |
} |
if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && |
reg.file == BRW_ARF_NULL) |
return; |
hstride = hstride_for_reg[reg.hstride]; |
if (reg.vstride == 0xf) { |
vstride = -1; |
} else { |
vstride = vstride_for_reg[reg.vstride]; |
} |
width = width_for_reg[reg.width]; |
execsize = execsize_for_reg[insn->header.execution_size]; |
/* Restrictions from 3.3.10: Register Region Restrictions. */ |
/* 3. */ |
assert(execsize >= width); |
/* 4. */ |
if (execsize == width && hstride != 0) { |
assert(vstride == -1 || vstride == width * hstride); |
} |
/* 5. */ |
if (execsize == width && hstride == 0) { |
/* no restriction on vstride. */ |
} |
/* 6. */ |
if (width == 1) { |
assert(hstride == 0); |
} |
/* 7. */ |
if (execsize == 1 && width == 1) { |
assert(hstride == 0); |
assert(vstride == 0); |
} |
/* 8. */ |
if (vstride == 0 && hstride == 0) { |
assert(width == 1); |
} |
/* 10. Check destination issues. */ |
} |
void |
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, |
struct brw_reg reg) |
{ |
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) |
assert(reg.nr < 128); |
gen7_convert_mrf_to_grf(p, ®); |
validate_reg(insn, reg); |
insn->bits1.da1.src0_reg_file = reg.file; |
insn->bits1.da1.src0_reg_type = reg.type; |
insn->bits2.da1.src0_abs = reg.abs; |
insn->bits2.da1.src0_negate = reg.negate; |
insn->bits2.da1.src0_address_mode = reg.address_mode; |
if (reg.file == BRW_IMMEDIATE_VALUE) { |
insn->bits3.ud = reg.dw1.ud; |
/* Required to set some fields in src1 as well: |
*/ |
insn->bits1.da1.src1_reg_file = 0; /* arf */ |
insn->bits1.da1.src1_reg_type = reg.type; |
} else { |
if (reg.address_mode == BRW_ADDRESS_DIRECT) { |
if (insn->header.access_mode == BRW_ALIGN_1) { |
insn->bits2.da1.src0_subreg_nr = reg.subnr; |
insn->bits2.da1.src0_reg_nr = reg.nr; |
} else { |
insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; |
insn->bits2.da16.src0_reg_nr = reg.nr; |
} |
} else { |
insn->bits2.ia1.src0_subreg_nr = reg.subnr; |
if (insn->header.access_mode == BRW_ALIGN_1) { |
insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; |
} else { |
insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; |
} |
} |
if (insn->header.access_mode == BRW_ALIGN_1) { |
if (reg.width == BRW_WIDTH_1 && |
insn->header.execution_size == BRW_EXECUTE_1) { |
insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; |
insn->bits2.da1.src0_width = BRW_WIDTH_1; |
insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; |
} else { |
insn->bits2.da1.src0_horiz_stride = reg.hstride; |
insn->bits2.da1.src0_width = reg.width; |
insn->bits2.da1.src0_vert_stride = reg.vstride; |
} |
} else { |
insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); |
insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); |
insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); |
insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); |
/* This is an oddity of the fact we're using the same |
* descriptions for registers in align_16 as align_1: |
*/ |
if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; |
else |
insn->bits2.da16.src0_vert_stride = reg.vstride; |
} |
} |
} |
void brw_set_src1(struct brw_compile *p, |
struct brw_instruction *insn, |
struct brw_reg reg) |
{ |
assert(reg.file != BRW_MESSAGE_REGISTER_FILE); |
assert(reg.nr < 128); |
gen7_convert_mrf_to_grf(p, ®); |
validate_reg(insn, reg); |
insn->bits1.da1.src1_reg_file = reg.file; |
insn->bits1.da1.src1_reg_type = reg.type; |
insn->bits3.da1.src1_abs = reg.abs; |
insn->bits3.da1.src1_negate = reg.negate; |
/* Only src1 can be immediate in two-argument instructions. */ |
assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); |
if (reg.file == BRW_IMMEDIATE_VALUE) { |
insn->bits3.ud = reg.dw1.ud; |
} else { |
/* This is a hardware restriction, which may or may not be lifted |
* in the future: |
*/ |
assert (reg.address_mode == BRW_ADDRESS_DIRECT); |
/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ |
if (insn->header.access_mode == BRW_ALIGN_1) { |
insn->bits3.da1.src1_subreg_nr = reg.subnr; |
insn->bits3.da1.src1_reg_nr = reg.nr; |
} else { |
insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; |
insn->bits3.da16.src1_reg_nr = reg.nr; |
} |
if (insn->header.access_mode == BRW_ALIGN_1) { |
if (reg.width == BRW_WIDTH_1 && |
insn->header.execution_size == BRW_EXECUTE_1) { |
insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; |
insn->bits3.da1.src1_width = BRW_WIDTH_1; |
insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; |
} else { |
insn->bits3.da1.src1_horiz_stride = reg.hstride; |
insn->bits3.da1.src1_width = reg.width; |
insn->bits3.da1.src1_vert_stride = reg.vstride; |
} |
} else { |
insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); |
insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); |
insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); |
insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); |
/* This is an oddity of the fact we're using the same |
* descriptions for registers in align_16 as align_1: |
*/ |
if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; |
else |
insn->bits3.da16.src1_vert_stride = reg.vstride; |
} |
} |
} |
/** |
* Set the Message Descriptor and Extended Message Descriptor fields |
* for SEND messages. |
* |
* \note This zeroes out the Function Control bits, so it must be called |
* \b before filling out any message-specific data. Callers can |
* choose not to fill in irrelevant bits; they will be zero. |
*/ |
static void |
brw_set_message_descriptor(struct brw_compile *p, |
struct brw_instruction *inst, |
enum brw_message_target sfid, |
unsigned msg_length, |
unsigned response_length, |
bool header_present, |
bool end_of_thread) |
{ |
brw_set_src1(p, inst, brw_imm_d(0)); |
if (p->gen >= 050) { |
inst->bits3.generic_gen5.header_present = header_present; |
inst->bits3.generic_gen5.response_length = response_length; |
inst->bits3.generic_gen5.msg_length = msg_length; |
inst->bits3.generic_gen5.end_of_thread = end_of_thread; |
if (p->gen >= 060) { |
/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ |
inst->header.destreg__conditionalmod = sfid; |
} else { |
/* Set Extended Message Descriptor (ex_desc) */ |
inst->bits2.send_gen5.sfid = sfid; |
inst->bits2.send_gen5.end_of_thread = end_of_thread; |
} |
} else { |
inst->bits3.generic.response_length = response_length; |
inst->bits3.generic.msg_length = msg_length; |
inst->bits3.generic.msg_target = sfid; |
inst->bits3.generic.end_of_thread = end_of_thread; |
} |
} |
static void brw_set_math_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned function, |
unsigned integer_type, |
bool low_precision, |
bool saturate, |
unsigned dataType) |
{ |
unsigned msg_length; |
unsigned response_length; |
/* Infer message length from the function */ |
switch (function) { |
case BRW_MATH_FUNCTION_POW: |
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: |
case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: |
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
msg_length = 2; |
break; |
default: |
msg_length = 1; |
break; |
} |
/* Infer response length from the function */ |
switch (function) { |
case BRW_MATH_FUNCTION_SINCOS: |
case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
response_length = 2; |
break; |
default: |
response_length = 1; |
break; |
} |
brw_set_message_descriptor(p, insn, BRW_SFID_MATH, |
msg_length, response_length, |
false, false); |
if (p->gen == 050) { |
insn->bits3.math_gen5.function = function; |
insn->bits3.math_gen5.int_type = integer_type; |
insn->bits3.math_gen5.precision = low_precision; |
insn->bits3.math_gen5.saturate = saturate; |
insn->bits3.math_gen5.data_type = dataType; |
insn->bits3.math_gen5.snapshot = 0; |
} else { |
insn->bits3.math.function = function; |
insn->bits3.math.int_type = integer_type; |
insn->bits3.math.precision = low_precision; |
insn->bits3.math.saturate = saturate; |
insn->bits3.math.data_type = dataType; |
} |
} |
static void brw_set_ff_sync_message(struct brw_compile *p, |
struct brw_instruction *insn, |
bool allocate, |
unsigned response_length, |
bool end_of_thread) |
{ |
brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
1, response_length, |
true, end_of_thread); |
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ |
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ |
insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ |
insn->bits3.urb_gen5.allocate = allocate; |
insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ |
insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ |
} |
static void brw_set_urb_message(struct brw_compile *p, |
struct brw_instruction *insn, |
bool allocate, |
bool used, |
unsigned msg_length, |
unsigned response_length, |
bool end_of_thread, |
bool complete, |
unsigned offset, |
unsigned swizzle_control) |
{ |
brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
msg_length, response_length, true, end_of_thread); |
if (p->gen >= 070) { |
insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ |
insn->bits3.urb_gen7.offset = offset; |
assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); |
insn->bits3.urb_gen7.swizzle_control = swizzle_control; |
/* per_slot_offset = 0 makes it ignore offsets in message header */ |
insn->bits3.urb_gen7.per_slot_offset = 0; |
insn->bits3.urb_gen7.complete = complete; |
} else if (p->gen >= 050) { |
insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ |
insn->bits3.urb_gen5.offset = offset; |
insn->bits3.urb_gen5.swizzle_control = swizzle_control; |
insn->bits3.urb_gen5.allocate = allocate; |
insn->bits3.urb_gen5.used = used; /* ? */ |
insn->bits3.urb_gen5.complete = complete; |
} else { |
insn->bits3.urb.opcode = 0; /* ? */ |
insn->bits3.urb.offset = offset; |
insn->bits3.urb.swizzle_control = swizzle_control; |
insn->bits3.urb.allocate = allocate; |
insn->bits3.urb.used = used; /* ? */ |
insn->bits3.urb.complete = complete; |
} |
} |
void |
brw_set_dp_write_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned binding_table_index, |
unsigned msg_control, |
unsigned msg_type, |
unsigned msg_length, |
bool header_present, |
bool last_render_target, |
unsigned response_length, |
bool end_of_thread, |
bool send_commit_msg) |
{ |
unsigned sfid; |
if (p->gen >= 070) { |
/* Use the Render Cache for RT writes; otherwise use the Data Cache */ |
if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) |
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
else |
sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
} else if (p->gen >= 060) { |
/* Use the render cache for all write messages. */ |
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
} else { |
sfid = BRW_SFID_DATAPORT_WRITE; |
} |
brw_set_message_descriptor(p, insn, sfid, |
msg_length, response_length, |
header_present, end_of_thread); |
if (p->gen >= 070) { |
insn->bits3.gen7_dp.binding_table_index = binding_table_index; |
insn->bits3.gen7_dp.msg_control = msg_control; |
insn->bits3.gen7_dp.last_render_target = last_render_target; |
insn->bits3.gen7_dp.msg_type = msg_type; |
} else if (p->gen >= 060) { |
insn->bits3.gen6_dp.binding_table_index = binding_table_index; |
insn->bits3.gen6_dp.msg_control = msg_control; |
insn->bits3.gen6_dp.last_render_target = last_render_target; |
insn->bits3.gen6_dp.msg_type = msg_type; |
insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; |
} else if (p->gen >= 050) { |
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; |
insn->bits3.dp_write_gen5.msg_control = msg_control; |
insn->bits3.dp_write_gen5.last_render_target = last_render_target; |
insn->bits3.dp_write_gen5.msg_type = msg_type; |
insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; |
} else { |
insn->bits3.dp_write.binding_table_index = binding_table_index; |
insn->bits3.dp_write.msg_control = msg_control; |
insn->bits3.dp_write.last_render_target = last_render_target; |
insn->bits3.dp_write.msg_type = msg_type; |
insn->bits3.dp_write.send_commit_msg = send_commit_msg; |
} |
} |
void |
brw_set_dp_read_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned binding_table_index, |
unsigned msg_control, |
unsigned msg_type, |
unsigned target_cache, |
unsigned msg_length, |
unsigned response_length) |
{ |
unsigned sfid; |
if (p->gen >= 070) { |
sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
} else if (p->gen >= 060) { |
if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) |
sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
else |
sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; |
} else { |
sfid = BRW_SFID_DATAPORT_READ; |
} |
brw_set_message_descriptor(p, insn, sfid, |
msg_length, response_length, |
true, false); |
if (p->gen >= 070) { |
insn->bits3.gen7_dp.binding_table_index = binding_table_index; |
insn->bits3.gen7_dp.msg_control = msg_control; |
insn->bits3.gen7_dp.last_render_target = 0; |
insn->bits3.gen7_dp.msg_type = msg_type; |
} else if (p->gen >= 060) { |
insn->bits3.gen6_dp.binding_table_index = binding_table_index; |
insn->bits3.gen6_dp.msg_control = msg_control; |
insn->bits3.gen6_dp.last_render_target = 0; |
insn->bits3.gen6_dp.msg_type = msg_type; |
insn->bits3.gen6_dp.send_commit_msg = 0; |
} else if (p->gen >= 050) { |
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; |
insn->bits3.dp_read_gen5.msg_control = msg_control; |
insn->bits3.dp_read_gen5.msg_type = msg_type; |
insn->bits3.dp_read_gen5.target_cache = target_cache; |
} else if (p->gen >= 045) { |
insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ |
insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ |
insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ |
insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ |
} else { |
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ |
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ |
insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ |
insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ |
} |
} |
static void brw_set_sampler_message(struct brw_compile *p, |
struct brw_instruction *insn, |
unsigned binding_table_index, |
unsigned sampler, |
unsigned msg_type, |
unsigned response_length, |
unsigned msg_length, |
bool header_present, |
unsigned simd_mode) |
{ |
brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, |
msg_length, response_length, |
header_present, false); |
if (p->gen >= 070) { |
insn->bits3.sampler_gen7.binding_table_index = binding_table_index; |
insn->bits3.sampler_gen7.sampler = sampler; |
insn->bits3.sampler_gen7.msg_type = msg_type; |
insn->bits3.sampler_gen7.simd_mode = simd_mode; |
} else if (p->gen >= 050) { |
insn->bits3.sampler_gen5.binding_table_index = binding_table_index; |
insn->bits3.sampler_gen5.sampler = sampler; |
insn->bits3.sampler_gen5.msg_type = msg_type; |
insn->bits3.sampler_gen5.simd_mode = simd_mode; |
} else if (p->gen >= 045) { |
insn->bits3.sampler_g4x.binding_table_index = binding_table_index; |
insn->bits3.sampler_g4x.sampler = sampler; |
insn->bits3.sampler_g4x.msg_type = msg_type; |
} else { |
insn->bits3.sampler.binding_table_index = binding_table_index; |
insn->bits3.sampler.sampler = sampler; |
insn->bits3.sampler.msg_type = msg_type; |
insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; |
} |
} |
void brw_NOP(struct brw_compile *p) |
{ |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); |
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
brw_set_src1(p, insn, brw_imm_ud(0x0)); |
} |
/*********************************************************************** |
* Comparisons, if/else/endif |
*/ |
static void |
push_if_stack(struct brw_compile *p, struct brw_instruction *inst) |
{ |
p->if_stack[p->if_stack_depth] = inst; |
p->if_stack_depth++; |
if (p->if_stack_array_size <= p->if_stack_depth) { |
p->if_stack_array_size *= 2; |
p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); |
} |
} |
/* EU takes the value from the flag register and pushes it onto some |
* sort of a stack (presumably merging with any flag value already on |
* the stack). Within an if block, the flags at the top of the stack |
* control execution on each channel of the unit, eg. on each of the |
* 16 pixel values in our wm programs. |
* |
* When the matching 'else' instruction is reached (presumably by |
* countdown of the instruction count patched in by our ELSE/ENDIF |
* functions), the relevent flags are inverted. |
* |
* When the matching 'endif' instruction is reached, the flags are |
* popped off. If the stack is now empty, normal execution resumes. |
*/ |
struct brw_instruction * |
brw_IF(struct brw_compile *p, unsigned execute_size) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_IF); |
/* Override the defaults for this instruction: */ |
if (p->gen < 060) { |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
} else if (p->gen < 070) { |
brw_set_dest(p, insn, brw_imm_w(0)); |
insn->bits1.branch_gen6.jump_count = 0; |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
} else { |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, brw_imm_ud(0)); |
insn->bits3.break_cont.jip = 0; |
insn->bits3.break_cont.uip = 0; |
} |
insn->header.execution_size = execute_size; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.predicate_control = BRW_PREDICATE_NORMAL; |
insn->header.mask_control = BRW_MASK_ENABLE; |
if (!p->single_program_flow) |
insn->header.thread_control = BRW_THREAD_SWITCH; |
p->current->header.predicate_control = BRW_PREDICATE_NONE; |
push_if_stack(p, insn); |
return insn; |
} |
/* This function is only used for gen6-style IF instructions with an |
* embedded comparison (conditional modifier). It is not used on gen7. |
*/ |
struct brw_instruction * |
gen6_IF(struct brw_compile *p, uint32_t conditional, |
struct brw_reg src0, struct brw_reg src1) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_IF); |
brw_set_dest(p, insn, brw_imm_w(0)); |
if (p->compressed) { |
insn->header.execution_size = BRW_EXECUTE_16; |
} else { |
insn->header.execution_size = BRW_EXECUTE_8; |
} |
insn->bits1.branch_gen6.jump_count = 0; |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, src1); |
assert(insn->header.compression_control == BRW_COMPRESSION_NONE); |
assert(insn->header.predicate_control == BRW_PREDICATE_NONE); |
insn->header.destreg__conditionalmod = conditional; |
if (!p->single_program_flow) |
insn->header.thread_control = BRW_THREAD_SWITCH; |
push_if_stack(p, insn); |
return insn; |
} |
/** |
* In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. |
*/ |
static void |
convert_IF_ELSE_to_ADD(struct brw_compile *p, |
struct brw_instruction *if_inst, |
struct brw_instruction *else_inst) |
{ |
/* The next instruction (where the ENDIF would be, if it existed) */ |
struct brw_instruction *next_inst = &p->store[p->nr_insn]; |
assert(p->single_program_flow); |
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); |
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); |
assert(if_inst->header.execution_size == BRW_EXECUTE_1); |
/* Convert IF to an ADD instruction that moves the instruction pointer |
* to the first instruction of the ELSE block. If there is no ELSE |
* block, point to where ENDIF would be. Reverse the predicate. |
* |
* There's no need to execute an ENDIF since we don't need to do any |
* stack operations, and if we're currently executing, we just want to |
* continue normally. |
*/ |
if_inst->header.opcode = BRW_OPCODE_ADD; |
if_inst->header.predicate_inverse = 1; |
if (else_inst != NULL) { |
/* Convert ELSE to an ADD instruction that points where the ENDIF |
* would be. |
*/ |
else_inst->header.opcode = BRW_OPCODE_ADD; |
if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; |
else_inst->bits3.ud = (next_inst - else_inst) * 16; |
} else { |
if_inst->bits3.ud = (next_inst - if_inst) * 16; |
} |
} |
/** |
* Patch IF and ELSE instructions with appropriate jump targets. |
*/ |
static void |
patch_IF_ELSE(struct brw_compile *p, |
struct brw_instruction *if_inst, |
struct brw_instruction *else_inst, |
struct brw_instruction *endif_inst) |
{ |
unsigned br = 1; |
assert(!p->single_program_flow); |
assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); |
assert(endif_inst != NULL); |
assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); |
/* Jump count is for 64bit data chunk each, so one 128bit instruction |
* requires 2 chunks. |
*/ |
if (p->gen >= 050) |
br = 2; |
assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); |
endif_inst->header.execution_size = if_inst->header.execution_size; |
if (else_inst == NULL) { |
/* Patch IF -> ENDIF */ |
if (p->gen < 060) { |
/* Turn it into an IFF, which means no mask stack operations for |
* all-false and jumping past the ENDIF. |
*/ |
if_inst->header.opcode = BRW_OPCODE_IFF; |
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); |
if_inst->bits3.if_else.pop_count = 0; |
if_inst->bits3.if_else.pad0 = 0; |
} else if (p->gen < 070) { |
/* As of gen6, there is no IFF and IF must point to the ENDIF. */ |
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); |
} else { |
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); |
if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); |
} |
} else { |
else_inst->header.execution_size = if_inst->header.execution_size; |
/* Patch IF -> ELSE */ |
if (p->gen < 060) { |
if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); |
if_inst->bits3.if_else.pop_count = 0; |
if_inst->bits3.if_else.pad0 = 0; |
} else if (p->gen <= 070) { |
if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); |
} |
/* Patch ELSE -> ENDIF */ |
if (p->gen < 060) { |
/* BRW_OPCODE_ELSE pre-gen6 should point just past the |
* matching ENDIF. |
*/ |
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); |
else_inst->bits3.if_else.pop_count = 1; |
else_inst->bits3.if_else.pad0 = 0; |
} else if (p->gen < 070) { |
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ |
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); |
} else { |
/* The IF instruction's JIP should point just past the ELSE */ |
if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); |
/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ |
if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); |
else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); |
} |
} |
} |
void |
brw_ELSE(struct brw_compile *p) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_ELSE); |
if (p->gen < 060) { |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
} else if (p->gen < 070) { |
brw_set_dest(p, insn, brw_imm_w(0)); |
insn->bits1.branch_gen6.jump_count = 0; |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
} else { |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, brw_imm_ud(0)); |
insn->bits3.break_cont.jip = 0; |
insn->bits3.break_cont.uip = 0; |
} |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.mask_control = BRW_MASK_ENABLE; |
if (!p->single_program_flow) |
insn->header.thread_control = BRW_THREAD_SWITCH; |
push_if_stack(p, insn); |
} |
void |
brw_ENDIF(struct brw_compile *p) |
{ |
struct brw_instruction *insn; |
struct brw_instruction *else_inst = NULL; |
struct brw_instruction *if_inst = NULL; |
/* Pop the IF and (optional) ELSE instructions from the stack */ |
p->if_stack_depth--; |
if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { |
else_inst = p->if_stack[p->if_stack_depth]; |
p->if_stack_depth--; |
} |
if_inst = p->if_stack[p->if_stack_depth]; |
if (p->single_program_flow) { |
/* ENDIF is useless; don't bother emitting it. */ |
convert_IF_ELSE_to_ADD(p, if_inst, else_inst); |
return; |
} |
insn = brw_next_insn(p, BRW_OPCODE_ENDIF); |
if (p->gen < 060) { |
brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
} else if (p->gen < 070) { |
brw_set_dest(p, insn, brw_imm_w(0)); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
} else { |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, brw_imm_ud(0)); |
} |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.mask_control = BRW_MASK_ENABLE; |
insn->header.thread_control = BRW_THREAD_SWITCH; |
/* Also pop item off the stack in the endif instruction: */ |
if (p->gen < 060) { |
insn->bits3.if_else.jump_count = 0; |
insn->bits3.if_else.pop_count = 1; |
insn->bits3.if_else.pad0 = 0; |
} else if (p->gen < 070) { |
insn->bits1.branch_gen6.jump_count = 2; |
} else { |
insn->bits3.break_cont.jip = 2; |
} |
patch_IF_ELSE(p, if_inst, else_inst, insn); |
} |
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_BREAK); |
if (p->gen >= 060) { |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
} else { |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
insn->bits3.if_else.pad0 = 0; |
insn->bits3.if_else.pop_count = pop_count; |
} |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.execution_size = BRW_EXECUTE_8; |
return insn; |
} |
struct brw_instruction *gen6_CONT(struct brw_compile *p, |
struct brw_instruction *do_insn) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.execution_size = BRW_EXECUTE_8; |
return insn; |
} |
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) |
{ |
struct brw_instruction *insn; |
insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0x0)); |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.execution_size = BRW_EXECUTE_8; |
/* insn->header.mask_control = BRW_MASK_DISABLE; */ |
insn->bits3.if_else.pad0 = 0; |
insn->bits3.if_else.pop_count = pop_count; |
return insn; |
} |
/* DO/WHILE loop: |
* |
* The DO/WHILE is just an unterminated loop -- break or continue are |
* used for control within the loop. We have a few ways they can be |
* done. |
* |
* For uniform control flow, the WHILE is just a jump, so ADD ip, ip, |
* jip and no DO instruction. |
* |
* For non-uniform control flow pre-gen6, there's a DO instruction to |
* push the mask, and a WHILE to jump back, and BREAK to get out and |
* pop the mask. |
* |
* For gen6, there's no more mask stack, so no need for DO. WHILE |
* just points back to the first instruction of the loop. |
*/ |
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) |
{ |
if (p->gen >= 060 || p->single_program_flow) { |
return &p->store[p->nr_insn]; |
} else { |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); |
/* Override the defaults for this instruction: |
*/ |
brw_set_dest(p, insn, brw_null_reg()); |
brw_set_src0(p, insn, brw_null_reg()); |
brw_set_src1(p, insn, brw_null_reg()); |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.execution_size = execute_size; |
insn->header.predicate_control = BRW_PREDICATE_NONE; |
/* insn->header.mask_control = BRW_MASK_ENABLE; */ |
/* insn->header.mask_control = BRW_MASK_DISABLE; */ |
return insn; |
} |
} |
struct brw_instruction *brw_WHILE(struct brw_compile *p, |
struct brw_instruction *do_insn) |
{ |
struct brw_instruction *insn; |
unsigned br = 1; |
if (p->gen >= 050) |
br = 2; |
if (p->gen >= 070) { |
insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, brw_imm_ud(0)); |
insn->bits3.break_cont.jip = br * (do_insn - insn); |
insn->header.execution_size = BRW_EXECUTE_8; |
} else if (p->gen >= 060) { |
insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
brw_set_dest(p, insn, brw_imm_w(0)); |
insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); |
brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
insn->header.execution_size = BRW_EXECUTE_8; |
} else { |
if (p->single_program_flow) { |
insn = brw_next_insn(p, BRW_OPCODE_ADD); |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); |
insn->header.execution_size = BRW_EXECUTE_1; |
} else { |
insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
assert(do_insn->header.opcode == BRW_OPCODE_DO); |
brw_set_dest(p, insn, brw_ip_reg()); |
brw_set_src0(p, insn, brw_ip_reg()); |
brw_set_src1(p, insn, brw_imm_d(0)); |
insn->header.execution_size = do_insn->header.execution_size; |
insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); |
insn->bits3.if_else.pop_count = 0; |
insn->bits3.if_else.pad0 = 0; |
} |
} |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
p->current->header.predicate_control = BRW_PREDICATE_NONE; |
return insn; |
} |
/* FORWARD JUMPS: |
*/ |
void brw_land_fwd_jump(struct brw_compile *p, |
struct brw_instruction *jmp_insn) |
{ |
struct brw_instruction *landing = &p->store[p->nr_insn]; |
unsigned jmpi = 1; |
if (p->gen >= 050) |
jmpi = 2; |
assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); |
assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); |
jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); |
} |
/* To integrate with the above, it makes sense that the comparison |
* instruction should populate the flag register. It might be simpler |
* just to use the flag reg for most WM tasks? |
*/ |
void brw_CMP(struct brw_compile *p, |
struct brw_reg dest, |
unsigned conditional, |
struct brw_reg src0, |
struct brw_reg src1) |
{ |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); |
insn->header.destreg__conditionalmod = conditional; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, src1); |
/* Make it so that future instructions will use the computed flag |
* value until brw_set_predicate_control_flag_value() is called |
* again. |
*/ |
if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && |
dest.nr == 0) { |
p->current->header.predicate_control = BRW_PREDICATE_NORMAL; |
p->flag_value = 0xff; |
} |
} |
/* Issue 'wait' instruction for n1, host could program MMIO |
to wake up thread. */ |
void brw_WAIT(struct brw_compile *p) |
{ |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); |
struct brw_reg src = brw_notification_1_reg(); |
brw_set_dest(p, insn, src); |
brw_set_src0(p, insn, src); |
brw_set_src1(p, insn, brw_null_reg()); |
insn->header.execution_size = 0; /* must */ |
insn->header.predicate_control = 0; |
insn->header.compression_control = 0; |
} |
/*********************************************************************** |
* Helpers for the various SEND message types: |
*/ |
/** Extended math function, float[8]. |
*/ |
void brw_math(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
unsigned saturate, |
unsigned msg_reg_nr, |
struct brw_reg src, |
unsigned data_type, |
unsigned precision) |
{ |
if (p->gen >= 060) { |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); |
assert(dest.file == BRW_GENERAL_REGISTER_FILE); |
assert(src.file == BRW_GENERAL_REGISTER_FILE); |
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); |
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); |
/* Source modifiers are ignored for extended math instructions. */ |
assert(!src.negate); |
assert(!src.abs); |
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && |
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { |
assert(src.type == BRW_REGISTER_TYPE_F); |
} |
/* Math is the same ISA format as other opcodes, except that CondModifier |
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
*/ |
insn->header.destreg__conditionalmod = function; |
insn->header.saturate = saturate; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
brw_set_src1(p, insn, brw_null_reg()); |
} else { |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
/* Example code doesn't set predicate_control for send |
* instructions. |
*/ |
insn->header.predicate_control = 0; |
insn->header.destreg__conditionalmod = msg_reg_nr; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
brw_set_math_message(p, insn, function, |
src.type == BRW_REGISTER_TYPE_D, |
precision, |
saturate, |
data_type); |
} |
} |
/** Extended math function, float[8]. |
*/ |
void brw_math2(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
struct brw_reg src0, |
struct brw_reg src1) |
{ |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); |
assert(dest.file == BRW_GENERAL_REGISTER_FILE); |
assert(src0.file == BRW_GENERAL_REGISTER_FILE); |
assert(src1.file == BRW_GENERAL_REGISTER_FILE); |
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); |
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); |
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); |
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && |
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { |
assert(src0.type == BRW_REGISTER_TYPE_F); |
assert(src1.type == BRW_REGISTER_TYPE_F); |
} |
/* Source modifiers are ignored for extended math instructions. */ |
assert(!src0.negate); |
assert(!src0.abs); |
assert(!src1.negate); |
assert(!src1.abs); |
/* Math is the same ISA format as other opcodes, except that CondModifier |
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
*/ |
insn->header.destreg__conditionalmod = function; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, src1); |
} |
/** |
* Extended math function, float[16]. |
* Use 2 send instructions. |
*/ |
void brw_math_16(struct brw_compile *p, |
struct brw_reg dest, |
unsigned function, |
unsigned saturate, |
unsigned msg_reg_nr, |
struct brw_reg src, |
unsigned precision) |
{ |
struct brw_instruction *insn; |
if (p->gen >= 060) { |
insn = brw_next_insn(p, BRW_OPCODE_MATH); |
/* Math is the same ISA format as other opcodes, except that CondModifier |
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
*/ |
insn->header.destreg__conditionalmod = function; |
insn->header.saturate = saturate; |
/* Source modifiers are ignored for extended math instructions. */ |
assert(!src.negate); |
assert(!src.abs); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
brw_set_src1(p, insn, brw_null_reg()); |
return; |
} |
/* First instruction: |
*/ |
brw_push_insn_state(p); |
brw_set_predicate_control_flag_value(p, 0xff); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.destreg__conditionalmod = msg_reg_nr; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
brw_set_math_message(p, insn, function, |
BRW_MATH_INTEGER_UNSIGNED, |
precision, |
saturate, |
BRW_MATH_DATA_VECTOR); |
/* Second instruction: |
*/ |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.compression_control = BRW_COMPRESSION_2NDHALF; |
insn->header.destreg__conditionalmod = msg_reg_nr+1; |
brw_set_dest(p, insn, __offset(dest,1)); |
brw_set_src0(p, insn, src); |
brw_set_math_message(p, insn, function, |
BRW_MATH_INTEGER_UNSIGNED, |
precision, |
saturate, |
BRW_MATH_DATA_VECTOR); |
brw_pop_insn_state(p); |
} |
/** |
* Write a block of OWORDs (half a GRF each) from the scratch buffer, |
* using a constant offset per channel. |
* |
* The offset must be aligned to oword size (16 bytes). Used for |
* register spilling. |
*/ |
void brw_oword_block_write_scratch(struct brw_compile *p, |
struct brw_reg mrf, |
int num_regs, |
unsigned offset) |
{ |
uint32_t msg_control, msg_type; |
int mlen; |
if (p->gen >= 060) |
offset /= 16; |
mrf = __retype_ud(mrf); |
if (num_regs == 1) { |
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
mlen = 2; |
} else { |
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
mlen = 3; |
} |
/* Set up the message header. This is g0, with g0.2 filled with |
* the offset. We don't want to leave our offset around in g0 or |
* it'll screw up texture samples, so set it up inside the message |
* reg. |
*/ |
{ |
brw_push_insn_state(p); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
/* set message header global offset field (reg 0, element 2) */ |
brw_MOV(p, |
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
brw_imm_ud(offset)); |
brw_pop_insn_state(p); |
} |
{ |
struct brw_reg dest; |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
int send_commit_msg; |
struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); |
if (insn->header.compression_control != BRW_COMPRESSION_NONE) { |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
src_header = vec16(src_header); |
} |
assert(insn->header.predicate_control == BRW_PREDICATE_NONE); |
insn->header.destreg__conditionalmod = mrf.nr; |
/* Until gen6, writes followed by reads from the same location |
* are not guaranteed to be ordered unless write_commit is set. |
* If set, then a no-op write is issued to the destination |
* register to set a dependency, and a read from the destination |
* can be used to ensure the ordering. |
* |
* For gen6, only writes between different threads need ordering |
* protection. Our use of DP writes is all about register |
* spilling within a thread. |
*/ |
if (p->gen >= 060) { |
dest = __retype_uw(vec16(brw_null_reg())); |
send_commit_msg = 0; |
} else { |
dest = src_header; |
send_commit_msg = 1; |
} |
brw_set_dest(p, insn, dest); |
if (p->gen >= 060) { |
brw_set_src0(p, insn, mrf); |
} else { |
brw_set_src0(p, insn, brw_null_reg()); |
} |
if (p->gen >= 060) |
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
else |
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
brw_set_dp_write_message(p, |
insn, |
255, /* binding table index (255=stateless) */ |
msg_control, |
msg_type, |
mlen, |
true, /* header_present */ |
0, /* pixel scoreboard */ |
send_commit_msg, /* response_length */ |
0, /* eot */ |
send_commit_msg); |
} |
} |
/** |
* Read a block of owords (half a GRF each) from the scratch buffer |
* using a constant index per channel. |
* |
* Offset must be aligned to oword size (16 bytes). Used for register |
* spilling. |
*/ |
void |
brw_oword_block_read_scratch(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
int num_regs, |
unsigned offset) |
{ |
uint32_t msg_control; |
int rlen; |
if (p->gen >= 060) |
offset /= 16; |
mrf = __retype_ud(mrf); |
dest = __retype_uw(dest); |
if (num_regs == 1) { |
msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
rlen = 1; |
} else { |
msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
rlen = 2; |
} |
{ |
brw_push_insn_state(p); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
/* set message header global offset field (reg 0, element 2) */ |
brw_MOV(p, |
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
brw_imm_ud(offset)); |
brw_pop_insn_state(p); |
} |
{ |
struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
assert(insn->header.predicate_control == 0); |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.destreg__conditionalmod = mrf.nr; |
brw_set_dest(p, insn, dest); /* UW? */ |
if (p->gen >= 060) { |
brw_set_src0(p, insn, mrf); |
} else { |
brw_set_src0(p, insn, brw_null_reg()); |
} |
brw_set_dp_read_message(p, |
insn, |
255, /* binding table index (255=stateless) */ |
msg_control, |
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ |
BRW_DATAPORT_READ_TARGET_RENDER_CACHE, |
1, /* msg_length */ |
rlen); |
} |
} |
/** |
* Read a float[4] vector from the data port Data Cache (const buffer). |
* Location (in buffer) should be a multiple of 16. |
* Used for fetching shader constants. |
*/ |
void brw_oword_block_read(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
uint32_t offset, |
uint32_t bind_table_index) |
{ |
struct brw_instruction *insn; |
/* On newer hardware, offset is in units of owords. */ |
if (p->gen >= 060) |
offset /= 16; |
mrf = __retype_ud(mrf); |
brw_push_insn_state(p); |
brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
/* set message header global offset field (reg 0, element 2) */ |
brw_MOV(p, |
__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
brw_imm_ud(offset)); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.destreg__conditionalmod = mrf.nr; |
/* cast dest to a uword[8] vector */ |
dest = __retype_uw(vec8(dest)); |
brw_set_dest(p, insn, dest); |
if (p->gen >= 060) { |
brw_set_src0(p, insn, mrf); |
} else { |
brw_set_src0(p, insn, brw_null_reg()); |
} |
brw_set_dp_read_message(p, |
insn, |
bind_table_index, |
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, |
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, |
BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
1, /* msg_length */ |
1); /* response_length (1 reg, 2 owords!) */ |
brw_pop_insn_state(p); |
} |
/** |
* Read a set of dwords from the data port Data Cache (const buffer). |
* |
* Location (in buffer) appears as UD offsets in the register after |
* the provided mrf header reg. |
*/ |
void brw_dword_scattered_read(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg mrf, |
uint32_t bind_table_index) |
{ |
struct brw_instruction *insn; |
mrf = __retype_ud(mrf); |
brw_push_insn_state(p); |
brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
brw_pop_insn_state(p); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.destreg__conditionalmod = mrf.nr; |
/* cast dest to a uword[8] vector */ |
dest = __retype_uw(vec8(dest)); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, brw_null_reg()); |
brw_set_dp_read_message(p, |
insn, |
bind_table_index, |
BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, |
BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, |
BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
2, /* msg_length */ |
1); /* response_length */ |
} |
/** |
* Read float[4] constant(s) from VS constant buffer. |
* For relative addressing, two float[4] constants will be read into 'dest'. |
* Otherwise, one float[4] constant will be read into the lower half of 'dest'. |
*/ |
void brw_dp_READ_4_vs(struct brw_compile *p, |
struct brw_reg dest, |
unsigned location, |
unsigned bind_table_index) |
{ |
struct brw_instruction *insn; |
unsigned msg_reg_nr = 1; |
if (p->gen >= 060) |
location /= 16; |
/* Setup MRF[1] with location/offset into const buffer */ |
brw_push_insn_state(p); |
brw_set_access_mode(p, BRW_ALIGN_1); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), |
brw_imm_ud(location)); |
brw_pop_insn_state(p); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.predicate_control = BRW_PREDICATE_NONE; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.destreg__conditionalmod = msg_reg_nr; |
insn->header.mask_control = BRW_MASK_DISABLE; |
brw_set_dest(p, insn, dest); |
if (p->gen >= 060) { |
brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); |
} else { |
brw_set_src0(p, insn, brw_null_reg()); |
} |
brw_set_dp_read_message(p, |
insn, |
bind_table_index, |
0, |
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ |
BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
1, /* msg_length */ |
1); /* response_length (1 Oword) */ |
} |
/** |
* Read a float[4] constant per vertex from VS constant buffer, with |
* relative addressing. |
*/ |
void brw_dp_READ_4_vs_relative(struct brw_compile *p, |
struct brw_reg dest, |
struct brw_reg addr_reg, |
unsigned offset, |
unsigned bind_table_index) |
{ |
struct brw_reg src = brw_vec8_grf(0, 0); |
struct brw_instruction *insn; |
int msg_type; |
/* Setup MRF[1] with offset into const buffer */ |
brw_push_insn_state(p); |
brw_set_access_mode(p, BRW_ALIGN_1); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
/* M1.0 is block offset 0, M1.4 is block offset 1, all other |
* fields ignored. |
*/ |
brw_ADD(p, __retype_d(brw_message_reg(1)), |
addr_reg, brw_imm_d(offset)); |
brw_pop_insn_state(p); |
gen6_resolve_implied_move(p, &src, 0); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.predicate_control = BRW_PREDICATE_NONE; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
insn->header.destreg__conditionalmod = 0; |
insn->header.mask_control = BRW_MASK_DISABLE; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src); |
if (p->gen >= 060) |
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
else if (p->gen >= 045) |
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
else |
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
brw_set_dp_read_message(p, |
insn, |
bind_table_index, |
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, |
msg_type, |
BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
2, /* msg_length */ |
1); /* response_length */ |
} |
void brw_fb_WRITE(struct brw_compile *p, |
int dispatch_width, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
unsigned msg_control, |
unsigned binding_table_index, |
unsigned msg_length, |
unsigned response_length, |
bool eot, |
bool header_present) |
{ |
struct brw_instruction *insn; |
unsigned msg_type; |
struct brw_reg dest; |
if (dispatch_width == 16) |
dest = __retype_uw(vec16(brw_null_reg())); |
else |
dest = __retype_uw(vec8(brw_null_reg())); |
if (p->gen >= 060 && binding_table_index == 0) { |
insn = brw_next_insn(p, BRW_OPCODE_SENDC); |
} else { |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
} |
/* The execution mask is ignored for render target writes. */ |
insn->header.predicate_control = 0; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
if (p->gen >= 060) { |
/* headerless version, just submit color payload */ |
src0 = brw_message_reg(msg_reg_nr); |
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
} else { |
insn->header.destreg__conditionalmod = msg_reg_nr; |
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
} |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_dp_write_message(p, |
insn, |
binding_table_index, |
msg_control, |
msg_type, |
msg_length, |
header_present, |
eot, |
response_length, |
eot, |
0 /* send_commit_msg */); |
} |
/** |
* Texture sample instruction. |
* Note: the msg_type plus msg_length values determine exactly what kind |
* of sampling operation is performed. See volume 4, page 161 of docs. |
*/ |
void brw_SAMPLE(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
unsigned binding_table_index, |
unsigned sampler, |
unsigned writemask, |
unsigned msg_type, |
unsigned response_length, |
unsigned msg_length, |
bool header_present, |
unsigned simd_mode) |
{ |
assert(writemask); |
if (p->gen < 050 || writemask != WRITEMASK_XYZW) { |
struct brw_reg m1 = brw_message_reg(msg_reg_nr); |
writemask = ~writemask & WRITEMASK_XYZW; |
brw_push_insn_state(p); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); |
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); |
brw_pop_insn_state(p); |
src0 = __retype_uw(brw_null_reg()); |
} |
{ |
struct brw_instruction *insn; |
gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.predicate_control = 0; /* XXX */ |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
if (p->gen < 060) |
insn->header.destreg__conditionalmod = msg_reg_nr; |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_sampler_message(p, insn, |
binding_table_index, |
sampler, |
msg_type, |
response_length, |
msg_length, |
header_present, |
simd_mode); |
} |
} |
/* All these variables are pretty confusing - we might be better off |
* using bitmasks and macros for this, in the old style. Or perhaps |
* just having the caller instantiate the fields in dword3 itself. |
*/ |
void brw_urb_WRITE(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
bool allocate, |
bool used, |
unsigned msg_length, |
unsigned response_length, |
bool eot, |
bool writes_complete, |
unsigned offset, |
unsigned swizzle) |
{ |
struct brw_instruction *insn; |
gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
if (p->gen >= 070) { |
/* Enable Channel Masks in the URB_WRITE_HWORD message header */ |
brw_push_insn_state(p); |
brw_set_access_mode(p, BRW_ALIGN_1); |
brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), |
__retype_ud(brw_vec1_grf(0, 5)), |
brw_imm_ud(0xff00)); |
brw_pop_insn_state(p); |
} |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
assert(msg_length < BRW_MAX_MRF); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, brw_imm_d(0)); |
if (p->gen <= 060) |
insn->header.destreg__conditionalmod = msg_reg_nr; |
brw_set_urb_message(p, |
insn, |
allocate, |
used, |
msg_length, |
response_length, |
eot, |
writes_complete, |
offset, |
swizzle); |
} |
static int |
brw_find_next_block_end(struct brw_compile *p, int start) |
{ |
int ip; |
for (ip = start + 1; ip < p->nr_insn; ip++) { |
struct brw_instruction *insn = &p->store[ip]; |
switch (insn->header.opcode) { |
case BRW_OPCODE_ENDIF: |
case BRW_OPCODE_ELSE: |
case BRW_OPCODE_WHILE: |
return ip; |
} |
} |
assert(!"not reached"); |
return start + 1; |
} |
/* There is no DO instruction on gen6, so to find the end of the loop |
* we have to see if the loop is jumping back before our start |
* instruction. |
*/ |
static int |
brw_find_loop_end(struct brw_compile *p, int start) |
{ |
int ip; |
int br = 2; |
for (ip = start + 1; ip < p->nr_insn; ip++) { |
struct brw_instruction *insn = &p->store[ip]; |
if (insn->header.opcode == BRW_OPCODE_WHILE) { |
int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count |
: insn->bits3.break_cont.jip; |
if (ip + jip / br <= start) |
return ip; |
} |
} |
assert(!"not reached"); |
return start + 1; |
} |
/* After program generation, go back and update the UIP and JIP of |
* BREAK and CONT instructions to their correct locations. |
*/ |
void |
brw_set_uip_jip(struct brw_compile *p) |
{ |
int ip; |
int br = 2; |
if (p->gen <= 060) |
return; |
for (ip = 0; ip < p->nr_insn; ip++) { |
struct brw_instruction *insn = &p->store[ip]; |
switch (insn->header.opcode) { |
case BRW_OPCODE_BREAK: |
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); |
/* Gen7 UIP points to WHILE; Gen6 points just after it */ |
insn->bits3.break_cont.uip = |
br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); |
break; |
case BRW_OPCODE_CONTINUE: |
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); |
insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); |
assert(insn->bits3.break_cont.uip != 0); |
assert(insn->bits3.break_cont.jip != 0); |
break; |
} |
} |
} |
void brw_ff_sync(struct brw_compile *p, |
struct brw_reg dest, |
unsigned msg_reg_nr, |
struct brw_reg src0, |
bool allocate, |
unsigned response_length, |
bool eot) |
{ |
struct brw_instruction *insn; |
gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
brw_set_dest(p, insn, dest); |
brw_set_src0(p, insn, src0); |
brw_set_src1(p, insn, brw_imm_d(0)); |
if (p->gen < 060) |
insn->header.destreg__conditionalmod = msg_reg_nr; |
brw_set_ff_sync_message(p, |
insn, |
allocate, |
response_length, |
eot); |
} |
/drivers/video/Intel-2D/brw/brw_wm.c |
---|
0,0 → 1,681 |
#include "brw.h" |
#define X16 8 |
#define Y16 10 |
static void brw_wm_xy(struct brw_compile *p, int dw) |
{ |
struct brw_reg r1 = brw_vec1_grf(1, 0); |
struct brw_reg r1_uw = __retype_uw(r1); |
struct brw_reg x_uw, y_uw; |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
if (dw == 16) { |
x_uw = brw_uw16_grf(30, 0); |
y_uw = brw_uw16_grf(28, 0); |
} else { |
x_uw = brw_uw8_grf(30, 0); |
y_uw = brw_uw8_grf(28, 0); |
} |
brw_ADD(p, |
x_uw, |
__stride(__suboffset(r1_uw, 4), 2, 4, 0), |
brw_imm_v(0x10101010)); |
brw_ADD(p, |
y_uw, |
__stride(__suboffset(r1_uw, 5), 2, 4, 0), |
brw_imm_v(0x11001100)); |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
brw_ADD(p, brw_vec8_grf(X16, 0), vec8(x_uw), brw_negate(r1)); |
brw_ADD(p, brw_vec8_grf(Y16, 0), vec8(y_uw), brw_negate(__suboffset(r1, 1))); |
} |
static void brw_wm_affine_st(struct brw_compile *p, int dw, |
int channel, int msg) |
{ |
int uv; |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
uv = p->gen >= 060 ? 6 : 3; |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
uv = p->gen >= 060 ? 4 : 3; |
} |
uv += 2*channel; |
msg++; |
if (p->gen >= 060) { |
brw_PLN(p, |
brw_message_reg(msg), |
brw_vec1_grf(uv, 0), |
brw_vec8_grf(2, 0)); |
msg += dw/8; |
brw_PLN(p, |
brw_message_reg(msg), |
brw_vec1_grf(uv, 4), |
brw_vec8_grf(2, 0)); |
} else { |
struct brw_reg r = brw_vec1_grf(uv, 0); |
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); |
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); |
msg += dw/8; |
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); |
brw_MAC(p, brw_message_reg(msg), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); |
} |
} |
static inline unsigned simd(int dw) |
{ |
return dw == 16 ? BRW_SAMPLER_SIMD_MODE_SIMD16 : BRW_SAMPLER_SIMD_MODE_SIMD8; |
} |
static inline struct brw_reg sample_result(int dw, int result) |
{ |
return brw_reg(BRW_GENERAL_REGISTER_FILE, result, 0, |
BRW_REGISTER_TYPE_UW, |
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, |
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYZW, |
WRITEMASK_XYZW); |
} |
static int brw_wm_sample(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
struct brw_reg src0; |
bool header; |
int len; |
len = dw == 16 ? 4 : 2; |
if (p->gen >= 060) { |
header = false; |
src0 = brw_message_reg(++msg); |
} else { |
header = true; |
src0 = brw_vec8_grf(0, 0); |
} |
brw_SAMPLE(p, sample_result(dw, result), msg, src0, |
channel+1, channel, WRITEMASK_XYZW, 0, |
2*len, len+header, header, simd(dw)); |
return result; |
} |
static int brw_wm_sample__alpha(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
struct brw_reg src0; |
int mlen, rlen; |
if (dw == 8) { |
/* SIMD8 sample return is not masked */ |
mlen = 3; |
rlen = 4; |
} else { |
mlen = 5; |
rlen = 2; |
} |
if (p->gen >= 060) |
src0 = brw_message_reg(msg); |
else |
src0 = brw_vec8_grf(0, 0); |
brw_SAMPLE(p, sample_result(dw, result), msg, src0, |
channel+1, channel, WRITEMASK_W, 0, |
rlen, mlen, true, simd(dw)); |
if (dw == 8) |
result += 3; |
return result; |
} |
static int brw_wm_affine(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
brw_wm_affine_st(p, dw, channel, msg); |
return brw_wm_sample(p, dw, channel, msg, result); |
} |
static int brw_wm_affine__alpha(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
brw_wm_affine_st(p, dw, channel, msg); |
return brw_wm_sample__alpha(p, dw, channel, msg, result); |
} |
static inline struct brw_reg null_result(int dw) |
{ |
return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0, |
BRW_REGISTER_TYPE_UW, |
dw == 16 ? BRW_VERTICAL_STRIDE_16 : BRW_VERTICAL_STRIDE_8, |
dw == 16 ? BRW_WIDTH_16 : BRW_WIDTH_8, |
BRW_HORIZONTAL_STRIDE_1, |
BRW_SWIZZLE_XYZW, |
WRITEMASK_XYZW); |
} |
static void brw_fb_write(struct brw_compile *p, int dw) |
{ |
struct brw_instruction *insn; |
unsigned msg_control, msg_type, msg_len; |
struct brw_reg src0; |
bool header; |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; |
msg_len = 8; |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; |
msg_len = 4; |
} |
if (p->gen < 060) { |
brw_push_insn_state(p); |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_set_mask_control(p, BRW_MASK_DISABLE); |
brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); |
brw_pop_insn_state(p); |
msg_len += 2; |
} |
/* The execution mask is ignored for render target writes. */ |
insn = brw_next_insn(p, BRW_OPCODE_SEND); |
insn->header.predicate_control = 0; |
insn->header.compression_control = BRW_COMPRESSION_NONE; |
if (p->gen >= 060) { |
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
src0 = brw_message_reg(2); |
header = false; |
} else { |
insn->header.destreg__conditionalmod = 0; |
msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
src0 = __retype_uw(brw_vec8_grf(0, 0)); |
header = true; |
} |
brw_set_dest(p, insn, null_result(dw)); |
brw_set_src0(p, insn, src0); |
brw_set_dp_write_message(p, insn, 0, |
msg_control, msg_type, msg_len, |
header, true, 0, true, false); |
} |
static void brw_wm_write(struct brw_compile *p, int dw, int src) |
{ |
int n; |
if (dw == 8 && p->gen >= 060) { |
/* XXX pixel execution mask? */ |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MOV(p, brw_message_reg(2), brw_vec8_grf(src+0, 0)); |
brw_MOV(p, brw_message_reg(3), brw_vec8_grf(src+1, 0)); |
brw_MOV(p, brw_message_reg(4), brw_vec8_grf(src+2, 0)); |
brw_MOV(p, brw_message_reg(5), brw_vec8_grf(src+3, 0)); |
goto done; |
} |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
for (n = 0; n < 4; n++) { |
if (p->gen >= 060) { |
brw_MOV(p, |
brw_message_reg(2 + 2*n), |
brw_vec8_grf(src + 2*n, 0)); |
} else if (p->gen >= 045 && dw == 16) { |
brw_MOV(p, |
brw_message_reg(2 + n + BRW_MRF_COMPR4), |
brw_vec8_grf(src + 2*n, 0)); |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MOV(p, |
brw_message_reg(2 + n), |
brw_vec8_grf(src + 2*n, 0)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
brw_MOV(p, |
brw_message_reg(2 + n + 4), |
brw_vec8_grf(src + 2*n+1, 0)); |
} |
} |
} |
done: |
brw_fb_write(p, dw); |
} |
static void brw_wm_write__mask(struct brw_compile *p, int dw, |
int src, int mask) |
{ |
int n; |
if (dw == 8 && p->gen >= 060) { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2), |
brw_vec8_grf(src+0, 0), |
brw_vec8_grf(mask, 0)); |
brw_MUL(p, |
brw_message_reg(3), |
brw_vec8_grf(src+1, 0), |
brw_vec8_grf(mask, 0)); |
brw_MUL(p, |
brw_message_reg(4), |
brw_vec8_grf(src+2, 0), |
brw_vec8_grf(mask, 0)); |
brw_MUL(p, |
brw_message_reg(5), |
brw_vec8_grf(src+3, 0), |
brw_vec8_grf(mask, 0)); |
goto done; |
} |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
for (n = 0; n < 4; n++) { |
if (p->gen >= 060) { |
brw_MUL(p, |
brw_message_reg(2 + 2*n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask, 0)); |
} else if (p->gen >= 045 && dw == 16) { |
brw_MUL(p, |
brw_message_reg(2 + n + BRW_MRF_COMPR4), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask, 0)); |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2 + n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask, 0)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
brw_MUL(p, |
brw_message_reg(2 + n + 4), |
brw_vec8_grf(src + 2*n+1, 0), |
brw_vec8_grf(mask+1, 0)); |
} |
} |
} |
done: |
brw_fb_write(p, dw); |
} |
static void brw_wm_write__opacity(struct brw_compile *p, int dw, |
int src, int mask) |
{ |
int n; |
if (dw == 8 && p->gen >= 060) { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2), |
brw_vec8_grf(src+0, 0), |
brw_vec1_grf(mask, 3)); |
brw_MUL(p, |
brw_message_reg(3), |
brw_vec8_grf(src+1, 0), |
brw_vec1_grf(mask, 3)); |
brw_MUL(p, |
brw_message_reg(4), |
brw_vec8_grf(src+2, 0), |
brw_vec1_grf(mask, 3)); |
brw_MUL(p, |
brw_message_reg(5), |
brw_vec8_grf(src+3, 0), |
brw_vec1_grf(mask, 3)); |
goto done; |
} |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
for (n = 0; n < 4; n++) { |
if (p->gen >= 060) { |
brw_MUL(p, |
brw_message_reg(2 + 2*n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec1_grf(mask, 3)); |
} else if (p->gen >= 045 && dw == 16) { |
brw_MUL(p, |
brw_message_reg(2 + n + BRW_MRF_COMPR4), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec1_grf(mask, 3)); |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2 + n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec1_grf(mask, 3)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
brw_MUL(p, |
brw_message_reg(2 + n + 4), |
brw_vec8_grf(src + 2*n+1, 0), |
brw_vec1_grf(mask, 3)); |
} |
} |
} |
done: |
brw_fb_write(p, dw); |
} |
static void brw_wm_write__mask_ca(struct brw_compile *p, int dw, |
int src, int mask) |
{ |
int n; |
if (dw == 8 && p->gen >= 060) { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2), |
brw_vec8_grf(src + 0, 0), |
brw_vec8_grf(mask + 0, 0)); |
brw_MUL(p, |
brw_message_reg(3), |
brw_vec8_grf(src + 1, 0), |
brw_vec8_grf(mask + 1, 0)); |
brw_MUL(p, |
brw_message_reg(4), |
brw_vec8_grf(src + 2, 0), |
brw_vec8_grf(mask + 2, 0)); |
brw_MUL(p, |
brw_message_reg(5), |
brw_vec8_grf(src + 3, 0), |
brw_vec8_grf(mask + 3, 0)); |
goto done; |
} |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
for (n = 0; n < 4; n++) { |
if (p->gen >= 060) { |
brw_MUL(p, |
brw_message_reg(2 + 2*n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask + 2*n, 0)); |
} else if (p->gen >= 045 && dw == 16) { |
brw_MUL(p, |
brw_message_reg(2 + n + BRW_MRF_COMPR4), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask + 2*n, 0)); |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_MUL(p, |
brw_message_reg(2 + n), |
brw_vec8_grf(src + 2*n, 0), |
brw_vec8_grf(mask + 2*n, 0)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
brw_MUL(p, |
brw_message_reg(2 + n + 4), |
brw_vec8_grf(src + 2*n + 1, 0), |
brw_vec8_grf(mask + 2*n + 1, 0)); |
} |
} |
} |
done: |
brw_fb_write(p, dw); |
} |
bool |
brw_wm_kernel__affine(struct brw_compile *p, int dispatch) |
{ |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
brw_wm_write(p, dispatch, brw_wm_affine(p, dispatch, 0, 1, 12)); |
return true; |
} |
bool |
brw_wm_kernel__affine_mask(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_affine(p, dispatch, 0, 1, 12); |
mask = brw_wm_affine__alpha(p, dispatch, 1, 6, 20); |
brw_wm_write__mask(p, dispatch, src, mask); |
return true; |
} |
bool |
brw_wm_kernel__affine_mask_ca(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_affine(p, dispatch, 0, 1, 12); |
mask = brw_wm_affine(p, dispatch, 1, 6, 20); |
brw_wm_write__mask_ca(p, dispatch, src, mask); |
return true; |
} |
bool |
brw_wm_kernel__affine_mask_sa(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_affine__alpha(p, dispatch, 0, 1, 12); |
mask = brw_wm_affine(p, dispatch, 1, 6, 16); |
brw_wm_write__mask(p, dispatch, mask, src); |
return true; |
} |
/* Projective variants */ |
static void brw_wm_projective_st(struct brw_compile *p, int dw, |
int channel, int msg) |
{ |
int uv; |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
uv = p->gen >= 060 ? 6 : 3; |
} else { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
uv = p->gen >= 060 ? 4 : 3; |
} |
uv += 2*channel; |
msg++; |
if (p->gen >= 060) { |
/* First compute 1/z */ |
brw_PLN(p, |
brw_message_reg(msg), |
brw_vec1_grf(uv+1, 0), |
brw_vec8_grf(2, 0)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); |
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
} else |
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); |
brw_PLN(p, |
brw_vec8_grf(28, 0), |
brw_vec1_grf(uv, 0), |
brw_vec8_grf(2, 0)); |
brw_MUL(p, |
brw_message_reg(msg), |
brw_vec8_grf(28, 0), |
brw_vec8_grf(30, 0)); |
msg += dw/8; |
brw_PLN(p, |
brw_vec8_grf(28, 0), |
brw_vec1_grf(uv, 0), |
brw_vec8_grf(4, 0)); |
brw_MUL(p, |
brw_message_reg(msg), |
brw_vec8_grf(28, 0), |
brw_vec8_grf(30, 0)); |
} else { |
struct brw_reg r = brw_vec1_grf(uv, 0); |
/* First compute 1/z */ |
brw_LINE(p, brw_null_reg(), brw_vec1_grf(uv+1, 0), brw_vec8_grf(X16, 0)); |
brw_MAC(p, brw_vec8_grf(30, 0), brw_vec1_grf(uv+1, 1), brw_vec8_grf(Y16, 0)); |
if (dw == 16) { |
brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); |
brw_math_invert(p, brw_vec8_grf(31, 0), brw_vec8_grf(31, 0)); |
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
} else |
brw_math_invert(p, brw_vec8_grf(30, 0), brw_vec8_grf(30, 0)); |
/* Now compute the output s,t values */ |
brw_LINE(p, brw_null_reg(), __suboffset(r, 0), brw_vec8_grf(X16, 0)); |
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 1), brw_vec8_grf(Y16, 0)); |
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); |
msg += dw/8; |
brw_LINE(p, brw_null_reg(), __suboffset(r, 4), brw_vec8_grf(X16, 0)); |
brw_MAC(p, brw_vec8_grf(28, 0), __suboffset(r, 5), brw_vec8_grf(Y16, 0)); |
brw_MUL(p, brw_message_reg(msg), brw_vec8_grf(28, 0), brw_vec8_grf(30, 0)); |
} |
} |
static int brw_wm_projective(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
brw_wm_projective_st(p, dw, channel, msg); |
return brw_wm_sample(p, dw, channel, msg, result); |
} |
static int brw_wm_projective__alpha(struct brw_compile *p, int dw, |
int channel, int msg, int result) |
{ |
brw_wm_projective_st(p, dw, channel, msg); |
return brw_wm_sample__alpha(p, dw, channel, msg, result); |
} |
bool |
brw_wm_kernel__projective(struct brw_compile *p, int dispatch) |
{ |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
brw_wm_write(p, dispatch, brw_wm_projective(p, dispatch, 0, 1, 12)); |
return true; |
} |
bool |
brw_wm_kernel__projective_mask(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_projective(p, dispatch, 0, 1, 12); |
mask = brw_wm_projective__alpha(p, dispatch, 1, 6, 20); |
brw_wm_write__mask(p, dispatch, src, mask); |
return true; |
} |
bool |
brw_wm_kernel__projective_mask_ca(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_projective(p, dispatch, 0, 1, 12); |
mask = brw_wm_projective(p, dispatch, 1, 6, 20); |
brw_wm_write__mask_ca(p, dispatch, src, mask); |
return true; |
} |
bool |
brw_wm_kernel__projective_mask_sa(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) |
brw_wm_xy(p, dispatch); |
src = brw_wm_projective__alpha(p, dispatch, 0, 1, 12); |
mask = brw_wm_projective(p, dispatch, 1, 6, 16); |
brw_wm_write__mask(p, dispatch, mask, src); |
return true; |
} |
bool |
brw_wm_kernel__affine_opacity(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) { |
brw_wm_xy(p, dispatch); |
mask = 5; |
} else |
mask = dispatch == 16 ? 8 : 6; |
src = brw_wm_affine(p, dispatch, 0, 1, 12); |
brw_wm_write__opacity(p, dispatch, src, mask); |
return true; |
} |
bool |
brw_wm_kernel__projective_opacity(struct brw_compile *p, int dispatch) |
{ |
int src, mask; |
if (p->gen < 060) { |
brw_wm_xy(p, dispatch); |
mask = 5; |
} else |
mask = dispatch == 16 ? 8 : 6; |
src = brw_wm_projective(p, dispatch, 0, 1, 12); |
brw_wm_write__opacity(p, dispatch, src, mask); |
return true; |
} |