0,0 → 1,1208 |
/************************************************************************** |
* |
* Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
|
/* |
* \author |
* Michal Krol, |
* Keith Whitwell |
*/ |
|
#include "pipe/p_compiler.h" |
#include "pipe/p_context.h" |
#include "pipe/p_screen.h" |
#include "pipe/p_shader_tokens.h" |
#include "pipe/p_state.h" |
#include "tgsi/tgsi_ureg.h" |
#include "st_mesa_to_tgsi.h" |
#include "st_context.h" |
#include "program/prog_instruction.h" |
#include "program/prog_parameter.h" |
#include "util/u_debug.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
|
|
#define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ |
(1 << PROGRAM_ENV_PARAM) | \ |
(1 << PROGRAM_STATE_VAR) | \ |
(1 << PROGRAM_NAMED_PARAM) | \ |
(1 << PROGRAM_CONSTANT) | \ |
(1 << PROGRAM_UNIFORM)) |
|
|
struct label { |
unsigned branch_target; |
unsigned token; |
}; |
|
|
/** |
* Intermediate state used during shader translation. |
*/ |
struct st_translate { |
struct ureg_program *ureg; |
|
struct ureg_dst temps[MAX_PROGRAM_TEMPS]; |
struct ureg_src *constants; |
struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; |
struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; |
struct ureg_dst address[1]; |
struct ureg_src samplers[PIPE_MAX_SAMPLERS]; |
|
/* Extra info for handling point size clamping in vertex shader */ |
struct ureg_dst pointSizeResult; /**< Actual point size output register */ |
struct ureg_src pointSizeConst; /**< Point size range constant register */ |
GLint pointSizeOutIndex; /**< Temp point size output register */ |
GLboolean prevInstWrotePointSize; |
|
const GLuint *inputMapping; |
const GLuint *outputMapping; |
|
/* For every instruction that contains a label (eg CALL), keep |
* details so that we can go back afterwards and emit the correct |
* tgsi instruction number for each label. |
*/ |
struct label *labels; |
unsigned labels_size; |
unsigned labels_count; |
|
/* Keep a record of the tgsi instruction number that each mesa |
* instruction starts at, will be used to fix up labels after |
* translation. |
*/ |
unsigned *insn; |
unsigned insn_size; |
unsigned insn_count; |
|
unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ |
|
boolean error; |
}; |
|
|
/** |
* Make note of a branch to a label in the TGSI code. |
* After we've emitted all instructions, we'll go over the list |
* of labels built here and patch the TGSI code with the actual |
* location of each label. |
*/ |
static unsigned *get_label( struct st_translate *t, |
unsigned branch_target ) |
{ |
unsigned i; |
|
if (t->labels_count + 1 >= t->labels_size) { |
unsigned old_size = t->labels_size; |
t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); |
t->labels = REALLOC( t->labels, |
old_size * sizeof t->labels[0], |
t->labels_size * sizeof t->labels[0] ); |
if (t->labels == NULL) { |
static unsigned dummy; |
t->error = TRUE; |
return &dummy; |
} |
} |
|
i = t->labels_count++; |
t->labels[i].branch_target = branch_target; |
return &t->labels[i].token; |
} |
|
|
/** |
* Called prior to emitting the TGSI code for each Mesa instruction. |
* Allocate additional space for instructions if needed. |
* Update the insn[] array so the next Mesa instruction points to |
* the next TGSI instruction. |
*/ |
static void set_insn_start( struct st_translate *t, |
unsigned start ) |
{ |
if (t->insn_count + 1 >= t->insn_size) { |
unsigned old_size = t->insn_size; |
t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); |
t->insn = REALLOC( t->insn, |
old_size * sizeof t->insn[0], |
t->insn_size * sizeof t->insn[0] ); |
if (t->insn == NULL) { |
t->error = TRUE; |
return; |
} |
} |
|
t->insn[t->insn_count++] = start; |
} |
|
|
/** |
* Map a Mesa dst register to a TGSI ureg_dst register. |
*/ |
static struct ureg_dst |
dst_register( struct st_translate *t, |
gl_register_file file, |
GLuint index ) |
{ |
switch( file ) { |
case PROGRAM_UNDEFINED: |
return ureg_dst_undef(); |
|
case PROGRAM_TEMPORARY: |
if (ureg_dst_is_undef(t->temps[index])) |
t->temps[index] = ureg_DECL_temporary( t->ureg ); |
|
return t->temps[index]; |
|
case PROGRAM_OUTPUT: |
if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ) |
t->prevInstWrotePointSize = GL_TRUE; |
|
if (t->procType == TGSI_PROCESSOR_VERTEX) |
assert(index < VERT_RESULT_MAX); |
else if (t->procType == TGSI_PROCESSOR_FRAGMENT) |
assert(index < FRAG_RESULT_MAX); |
else |
assert(index < GEOM_RESULT_MAX); |
|
assert(t->outputMapping[index] < Elements(t->outputs)); |
|
return t->outputs[t->outputMapping[index]]; |
|
case PROGRAM_ADDRESS: |
return t->address[index]; |
|
default: |
debug_assert( 0 ); |
return ureg_dst_undef(); |
} |
} |
|
|
/** |
* Map a Mesa src register to a TGSI ureg_src register. |
*/ |
static struct ureg_src |
src_register( struct st_translate *t, |
gl_register_file file, |
GLint index ) |
{ |
switch( file ) { |
case PROGRAM_UNDEFINED: |
return ureg_src_undef(); |
|
case PROGRAM_TEMPORARY: |
assert(index >= 0); |
if (ureg_dst_is_undef(t->temps[index])) |
t->temps[index] = ureg_DECL_temporary( t->ureg ); |
assert(index < Elements(t->temps)); |
return ureg_src(t->temps[index]); |
|
case PROGRAM_NAMED_PARAM: |
case PROGRAM_ENV_PARAM: |
case PROGRAM_LOCAL_PARAM: |
case PROGRAM_UNIFORM: |
assert(index >= 0); |
return t->constants[index]; |
case PROGRAM_STATE_VAR: |
case PROGRAM_CONSTANT: /* ie, immediate */ |
if (index < 0) |
return ureg_DECL_constant( t->ureg, 0 ); |
else |
return t->constants[index]; |
|
case PROGRAM_INPUT: |
assert(t->inputMapping[index] < Elements(t->inputs)); |
return t->inputs[t->inputMapping[index]]; |
|
case PROGRAM_OUTPUT: |
assert(t->outputMapping[index] < Elements(t->outputs)); |
return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ |
|
case PROGRAM_ADDRESS: |
return ureg_src(t->address[index]); |
|
default: |
debug_assert( 0 ); |
return ureg_src_undef(); |
} |
} |
|
|
/** |
* Map mesa texture target to TGSI texture target. |
*/ |
static unsigned |
translate_texture_target( GLuint textarget, |
GLboolean shadow ) |
{ |
if (shadow) { |
switch( textarget ) { |
case TEXTURE_1D_INDEX: return TGSI_TEXTURE_SHADOW1D; |
case TEXTURE_2D_INDEX: return TGSI_TEXTURE_SHADOW2D; |
case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_SHADOWRECT; |
default: break; |
} |
} |
|
switch( textarget ) { |
case TEXTURE_1D_INDEX: return TGSI_TEXTURE_1D; |
case TEXTURE_2D_INDEX: return TGSI_TEXTURE_2D; |
case TEXTURE_3D_INDEX: return TGSI_TEXTURE_3D; |
case TEXTURE_CUBE_INDEX: return TGSI_TEXTURE_CUBE; |
case TEXTURE_RECT_INDEX: return TGSI_TEXTURE_RECT; |
default: |
debug_assert( 0 ); |
return TGSI_TEXTURE_1D; |
} |
} |
|
|
/** |
* Create a TGSI ureg_dst register from a Mesa dest register. |
*/ |
static struct ureg_dst |
translate_dst( struct st_translate *t, |
const struct prog_dst_register *DstReg, |
boolean saturate ) |
{ |
struct ureg_dst dst = dst_register( t, |
DstReg->File, |
DstReg->Index ); |
|
dst = ureg_writemask( dst, |
DstReg->WriteMask ); |
|
if (saturate) |
dst = ureg_saturate( dst ); |
|
if (DstReg->RelAddr) |
dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); |
|
return dst; |
} |
|
|
/** |
* Create a TGSI ureg_src register from a Mesa src register. |
*/ |
static struct ureg_src |
translate_src( struct st_translate *t, |
const struct prog_src_register *SrcReg ) |
{ |
struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); |
|
if (t->procType == TGSI_PROCESSOR_GEOMETRY && SrcReg->HasIndex2) { |
src = src_register( t, SrcReg->File, SrcReg->Index2 ); |
if (SrcReg->RelAddr2) |
src = ureg_src_dimension_indirect( src, ureg_src(t->address[0]), |
SrcReg->Index); |
else |
src = ureg_src_dimension( src, SrcReg->Index); |
} |
|
src = ureg_swizzle( src, |
GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, |
GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, |
GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, |
GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); |
|
if (SrcReg->Negate == NEGATE_XYZW) |
src = ureg_negate(src); |
|
if (SrcReg->Abs) |
src = ureg_abs(src); |
|
if (SrcReg->RelAddr) { |
src = ureg_src_indirect( src, ureg_src(t->address[0])); |
if (SrcReg->File != PROGRAM_INPUT && |
SrcReg->File != PROGRAM_OUTPUT) { |
/* If SrcReg->Index was negative, it was set to zero in |
* src_register(). Reassign it now. But don't do this |
* for input/output regs since they get remapped while |
* const buffers don't. |
*/ |
src.Index = SrcReg->Index; |
} |
} |
|
return src; |
} |
|
|
static struct ureg_src swizzle_4v( struct ureg_src src, |
const unsigned *swz ) |
{ |
return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); |
} |
|
|
/** |
* Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: |
* |
* SWZ dst, src.x-y10 |
* |
* becomes: |
* |
* MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} |
*/ |
static void emit_swz( struct st_translate *t, |
struct ureg_dst dst, |
const struct prog_src_register *SrcReg ) |
{ |
struct ureg_program *ureg = t->ureg; |
struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); |
|
unsigned negate_mask = SrcReg->Negate; |
|
unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | |
(GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | |
(GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | |
(GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); |
|
unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | |
(GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | |
(GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | |
(GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); |
|
unsigned negative_one_mask = one_mask & negate_mask; |
unsigned positive_one_mask = one_mask & ~negate_mask; |
|
struct ureg_src imm; |
unsigned i; |
unsigned mul_swizzle[4] = {0,0,0,0}; |
unsigned add_swizzle[4] = {0,0,0,0}; |
unsigned src_swizzle[4] = {0,0,0,0}; |
boolean need_add = FALSE; |
boolean need_mul = FALSE; |
|
if (dst.WriteMask == 0) |
return; |
|
/* Is this just a MOV? |
*/ |
if (zero_mask == 0 && |
one_mask == 0 && |
(negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) |
{ |
ureg_MOV( ureg, dst, translate_src( t, SrcReg )); |
return; |
} |
|
#define IMM_ZERO 0 |
#define IMM_ONE 1 |
#define IMM_NEG_ONE 2 |
|
imm = ureg_imm3f( ureg, 0, 1, -1 ); |
|
for (i = 0; i < 4; i++) { |
unsigned bit = 1 << i; |
|
if (dst.WriteMask & bit) { |
if (positive_one_mask & bit) { |
mul_swizzle[i] = IMM_ZERO; |
add_swizzle[i] = IMM_ONE; |
need_add = TRUE; |
} |
else if (negative_one_mask & bit) { |
mul_swizzle[i] = IMM_ZERO; |
add_swizzle[i] = IMM_NEG_ONE; |
need_add = TRUE; |
} |
else if (zero_mask & bit) { |
mul_swizzle[i] = IMM_ZERO; |
add_swizzle[i] = IMM_ZERO; |
need_add = TRUE; |
} |
else { |
add_swizzle[i] = IMM_ZERO; |
src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); |
need_mul = TRUE; |
if (negate_mask & bit) { |
mul_swizzle[i] = IMM_NEG_ONE; |
} |
else { |
mul_swizzle[i] = IMM_ONE; |
} |
} |
} |
} |
|
if (need_mul && need_add) { |
ureg_MAD( ureg, |
dst, |
swizzle_4v( src, src_swizzle ), |
swizzle_4v( imm, mul_swizzle ), |
swizzle_4v( imm, add_swizzle ) ); |
} |
else if (need_mul) { |
ureg_MUL( ureg, |
dst, |
swizzle_4v( src, src_swizzle ), |
swizzle_4v( imm, mul_swizzle ) ); |
} |
else if (need_add) { |
ureg_MOV( ureg, |
dst, |
swizzle_4v( imm, add_swizzle ) ); |
} |
else { |
debug_assert(0); |
} |
|
#undef IMM_ZERO |
#undef IMM_ONE |
#undef IMM_NEG_ONE |
} |
|
|
/** |
* Negate the value of DDY to match GL semantics where (0,0) is the |
* lower-left corner of the window. |
* Note that the GL_ARB_fragment_coord_conventions extension will |
* effect this someday. |
*/ |
static void emit_ddy( struct st_translate *t, |
struct ureg_dst dst, |
const struct prog_src_register *SrcReg ) |
{ |
struct ureg_program *ureg = t->ureg; |
struct ureg_src src = translate_src( t, SrcReg ); |
src = ureg_negate( src ); |
ureg_DDY( ureg, dst, src ); |
} |
|
|
|
static unsigned |
translate_opcode( unsigned op ) |
{ |
switch( op ) { |
case OPCODE_ARL: |
return TGSI_OPCODE_ARL; |
case OPCODE_ABS: |
return TGSI_OPCODE_ABS; |
case OPCODE_ADD: |
return TGSI_OPCODE_ADD; |
case OPCODE_BGNLOOP: |
return TGSI_OPCODE_BGNLOOP; |
case OPCODE_BGNSUB: |
return TGSI_OPCODE_BGNSUB; |
case OPCODE_BRA: |
return TGSI_OPCODE_BRA; |
case OPCODE_BRK: |
return TGSI_OPCODE_BRK; |
case OPCODE_CAL: |
return TGSI_OPCODE_CAL; |
case OPCODE_CMP: |
return TGSI_OPCODE_CMP; |
case OPCODE_CONT: |
return TGSI_OPCODE_CONT; |
case OPCODE_COS: |
return TGSI_OPCODE_COS; |
case OPCODE_DDX: |
return TGSI_OPCODE_DDX; |
case OPCODE_DDY: |
return TGSI_OPCODE_DDY; |
case OPCODE_DP2: |
return TGSI_OPCODE_DP2; |
case OPCODE_DP2A: |
return TGSI_OPCODE_DP2A; |
case OPCODE_DP3: |
return TGSI_OPCODE_DP3; |
case OPCODE_DP4: |
return TGSI_OPCODE_DP4; |
case OPCODE_DPH: |
return TGSI_OPCODE_DPH; |
case OPCODE_DST: |
return TGSI_OPCODE_DST; |
case OPCODE_ELSE: |
return TGSI_OPCODE_ELSE; |
case OPCODE_EMIT_VERTEX: |
return TGSI_OPCODE_EMIT; |
case OPCODE_END_PRIMITIVE: |
return TGSI_OPCODE_ENDPRIM; |
case OPCODE_ENDIF: |
return TGSI_OPCODE_ENDIF; |
case OPCODE_ENDLOOP: |
return TGSI_OPCODE_ENDLOOP; |
case OPCODE_ENDSUB: |
return TGSI_OPCODE_ENDSUB; |
case OPCODE_EX2: |
return TGSI_OPCODE_EX2; |
case OPCODE_EXP: |
return TGSI_OPCODE_EXP; |
case OPCODE_FLR: |
return TGSI_OPCODE_FLR; |
case OPCODE_FRC: |
return TGSI_OPCODE_FRC; |
case OPCODE_IF: |
return TGSI_OPCODE_IF; |
case OPCODE_TRUNC: |
return TGSI_OPCODE_TRUNC; |
case OPCODE_KIL: |
return TGSI_OPCODE_KIL; |
case OPCODE_KIL_NV: |
return TGSI_OPCODE_KILP; |
case OPCODE_LG2: |
return TGSI_OPCODE_LG2; |
case OPCODE_LOG: |
return TGSI_OPCODE_LOG; |
case OPCODE_LIT: |
return TGSI_OPCODE_LIT; |
case OPCODE_LRP: |
return TGSI_OPCODE_LRP; |
case OPCODE_MAD: |
return TGSI_OPCODE_MAD; |
case OPCODE_MAX: |
return TGSI_OPCODE_MAX; |
case OPCODE_MIN: |
return TGSI_OPCODE_MIN; |
case OPCODE_MOV: |
return TGSI_OPCODE_MOV; |
case OPCODE_MUL: |
return TGSI_OPCODE_MUL; |
case OPCODE_NOP: |
return TGSI_OPCODE_NOP; |
case OPCODE_NRM3: |
return TGSI_OPCODE_NRM; |
case OPCODE_NRM4: |
return TGSI_OPCODE_NRM4; |
case OPCODE_POW: |
return TGSI_OPCODE_POW; |
case OPCODE_RCP: |
return TGSI_OPCODE_RCP; |
case OPCODE_RET: |
return TGSI_OPCODE_RET; |
case OPCODE_RSQ: |
return TGSI_OPCODE_RSQ; |
case OPCODE_SCS: |
return TGSI_OPCODE_SCS; |
case OPCODE_SEQ: |
return TGSI_OPCODE_SEQ; |
case OPCODE_SGE: |
return TGSI_OPCODE_SGE; |
case OPCODE_SGT: |
return TGSI_OPCODE_SGT; |
case OPCODE_SIN: |
return TGSI_OPCODE_SIN; |
case OPCODE_SLE: |
return TGSI_OPCODE_SLE; |
case OPCODE_SLT: |
return TGSI_OPCODE_SLT; |
case OPCODE_SNE: |
return TGSI_OPCODE_SNE; |
case OPCODE_SSG: |
return TGSI_OPCODE_SSG; |
case OPCODE_SUB: |
return TGSI_OPCODE_SUB; |
case OPCODE_TEX: |
return TGSI_OPCODE_TEX; |
case OPCODE_TXB: |
return TGSI_OPCODE_TXB; |
case OPCODE_TXD: |
return TGSI_OPCODE_TXD; |
case OPCODE_TXL: |
return TGSI_OPCODE_TXL; |
case OPCODE_TXP: |
return TGSI_OPCODE_TXP; |
case OPCODE_XPD: |
return TGSI_OPCODE_XPD; |
case OPCODE_END: |
return TGSI_OPCODE_END; |
default: |
debug_assert( 0 ); |
return TGSI_OPCODE_NOP; |
} |
} |
|
|
static void |
compile_instruction( |
struct st_translate *t, |
const struct prog_instruction *inst ) |
{ |
struct ureg_program *ureg = t->ureg; |
GLuint i; |
struct ureg_dst dst[1]; |
struct ureg_src src[4]; |
unsigned num_dst; |
unsigned num_src; |
|
num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); |
num_src = _mesa_num_inst_src_regs( inst->Opcode ); |
|
if (num_dst) |
dst[0] = translate_dst( t, |
&inst->DstReg, |
inst->SaturateMode ); |
|
for (i = 0; i < num_src; i++) |
src[i] = translate_src( t, &inst->SrcReg[i] ); |
|
switch( inst->Opcode ) { |
case OPCODE_SWZ: |
emit_swz( t, dst[0], &inst->SrcReg[0] ); |
return; |
|
case OPCODE_BGNLOOP: |
case OPCODE_CAL: |
case OPCODE_ELSE: |
case OPCODE_ENDLOOP: |
case OPCODE_IF: |
debug_assert(num_dst == 0); |
ureg_label_insn( ureg, |
translate_opcode( inst->Opcode ), |
src, num_src, |
get_label( t, inst->BranchTarget )); |
return; |
|
case OPCODE_TEX: |
case OPCODE_TXB: |
case OPCODE_TXD: |
case OPCODE_TXL: |
case OPCODE_TXP: |
src[num_src++] = t->samplers[inst->TexSrcUnit]; |
ureg_tex_insn( ureg, |
translate_opcode( inst->Opcode ), |
dst, num_dst, |
translate_texture_target( inst->TexSrcTarget, |
inst->TexShadow ), |
src, num_src ); |
return; |
|
case OPCODE_SCS: |
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); |
ureg_insn( ureg, |
translate_opcode( inst->Opcode ), |
dst, num_dst, |
src, num_src ); |
break; |
|
case OPCODE_XPD: |
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); |
ureg_insn( ureg, |
translate_opcode( inst->Opcode ), |
dst, num_dst, |
src, num_src ); |
break; |
|
case OPCODE_NOISE1: |
case OPCODE_NOISE2: |
case OPCODE_NOISE3: |
case OPCODE_NOISE4: |
/* At some point, a motivated person could add a better |
* implementation of noise. Currently not even the nvidia |
* binary drivers do anything more than this. In any case, the |
* place to do this is in the GL state tracker, not the poor |
* driver. |
*/ |
ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) ); |
break; |
|
case OPCODE_DDY: |
emit_ddy( t, dst[0], &inst->SrcReg[0] ); |
break; |
|
default: |
ureg_insn( ureg, |
translate_opcode( inst->Opcode ), |
dst, num_dst, |
src, num_src ); |
break; |
} |
} |
|
|
/** |
* Emit the TGSI instructions to adjust the WPOS pixel center convention |
* Basically, add (adjX, adjY) to the fragment position. |
*/ |
static void |
emit_adjusted_wpos( struct st_translate *t, |
const struct gl_program *program, |
GLfloat adjX, GLfloat adjY) |
{ |
struct ureg_program *ureg = t->ureg; |
struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); |
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; |
|
/* Note that we bias X and Y and pass Z and W through unchanged. |
* The shader might also use gl_FragCoord.w and .z. |
*/ |
ureg_ADD(ureg, wpos_temp, wpos_input, |
ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f)); |
|
t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); |
} |
|
|
/** |
* Emit the TGSI instructions for inverting the WPOS y coordinate. |
* This code is unavoidable because it also depends on whether |
* a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). |
*/ |
static void |
emit_wpos_inversion( struct st_translate *t, |
const struct gl_program *program, |
boolean invert) |
{ |
struct ureg_program *ureg = t->ureg; |
|
/* Fragment program uses fragment position input. |
* Need to replace instances of INPUT[WPOS] with temp T |
* where T = INPUT[WPOS] by y is inverted. |
*/ |
static const gl_state_index wposTransformState[STATE_LENGTH] |
= { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; |
|
/* XXX: note we are modifying the incoming shader here! Need to |
* do this before emitting the constant decls below, or this |
* will be missed: |
*/ |
unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, |
wposTransformState); |
|
struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); |
struct ureg_dst wpos_temp; |
struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; |
|
/* MOV wpos_temp, input[wpos] |
*/ |
if (wpos_input.File == TGSI_FILE_TEMPORARY) |
wpos_temp = ureg_dst(wpos_input); |
else { |
wpos_temp = ureg_DECL_temporary( ureg ); |
ureg_MOV( ureg, wpos_temp, wpos_input ); |
} |
|
if (invert) { |
/* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy |
*/ |
ureg_MAD( ureg, |
ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), |
wpos_input, |
ureg_scalar(wpostrans, 0), |
ureg_scalar(wpostrans, 1)); |
} else { |
/* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww |
*/ |
ureg_MAD( ureg, |
ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), |
wpos_input, |
ureg_scalar(wpostrans, 2), |
ureg_scalar(wpostrans, 3)); |
} |
|
/* Use wpos_temp as position input from here on: |
*/ |
t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); |
} |
|
|
/** |
* Emit fragment position/ooordinate code. |
*/ |
static void |
emit_wpos(struct st_context *st, |
struct st_translate *t, |
const struct gl_program *program, |
struct ureg_program *ureg) |
{ |
const struct gl_fragment_program *fp = |
(const struct gl_fragment_program *) program; |
struct pipe_screen *pscreen = st->pipe->screen; |
boolean invert = FALSE; |
|
if (fp->OriginUpperLeft) { |
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { |
} |
else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { |
ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); |
invert = TRUE; |
} |
else |
assert(0); |
} |
else { |
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) |
ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); |
else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) |
invert = TRUE; |
else |
assert(0); |
} |
|
if (fp->PixelCenterInteger) { |
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) |
ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) |
emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f); |
else |
assert(0); |
} |
else { |
if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { |
} |
else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { |
ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f); |
} |
else |
assert(0); |
} |
|
/* we invert after adjustment so that we avoid the MOV to temporary, |
* and reuse the adjustment ADD instead */ |
emit_wpos_inversion(t, program, invert); |
} |
|
|
/** |
* OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. |
* TGSI uses +1 for front, -1 for back. |
* This function converts the TGSI value to the GL value. Simply clamping/ |
* saturating the value to [0,1] does the job. |
*/ |
static void |
emit_face_var( struct st_translate *t, |
const struct gl_program *program ) |
{ |
struct ureg_program *ureg = t->ureg; |
struct ureg_dst face_temp = ureg_DECL_temporary( ureg ); |
struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; |
|
/* MOV_SAT face_temp, input[face] |
*/ |
face_temp = ureg_saturate( face_temp ); |
ureg_MOV( ureg, face_temp, face_input ); |
|
/* Use face_temp as face input from here on: |
*/ |
t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); |
} |
|
|
static void |
emit_edgeflags( struct st_translate *t, |
const struct gl_program *program ) |
{ |
struct ureg_program *ureg = t->ureg; |
struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; |
struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; |
|
ureg_MOV( ureg, edge_dst, edge_src ); |
} |
|
|
/** |
* Translate Mesa program to TGSI format. |
* \param program the program to translate |
* \param numInputs number of input registers used |
* \param inputMapping maps Mesa fragment program inputs to TGSI generic |
* input indexes |
* \param inputSemanticName the TGSI_SEMANTIC flag for each input |
* \param inputSemanticIndex the semantic index (ex: which texcoord) for |
* each input |
* \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input |
* \param numOutputs number of output registers used |
* \param outputMapping maps Mesa fragment program outputs to TGSI |
* generic outputs |
* \param outputSemanticName the TGSI_SEMANTIC flag for each output |
* \param outputSemanticIndex the semantic index (ex: which texcoord) for |
* each output |
* |
* \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY |
*/ |
enum pipe_error |
st_translate_mesa_program( |
struct gl_context *ctx, |
uint procType, |
struct ureg_program *ureg, |
const struct gl_program *program, |
GLuint numInputs, |
const GLuint inputMapping[], |
const ubyte inputSemanticName[], |
const ubyte inputSemanticIndex[], |
const GLuint interpMode[], |
GLuint numOutputs, |
const GLuint outputMapping[], |
const ubyte outputSemanticName[], |
const ubyte outputSemanticIndex[], |
boolean passthrough_edgeflags ) |
{ |
struct st_translate translate, *t; |
unsigned i; |
enum pipe_error ret = PIPE_OK; |
|
assert(numInputs <= Elements(t->inputs)); |
assert(numOutputs <= Elements(t->outputs)); |
|
t = &translate; |
memset(t, 0, sizeof *t); |
|
t->procType = procType; |
t->inputMapping = inputMapping; |
t->outputMapping = outputMapping; |
t->ureg = ureg; |
t->pointSizeOutIndex = -1; |
t->prevInstWrotePointSize = GL_FALSE; |
|
/*_mesa_print_program(program);*/ |
|
/* |
* Declare input attributes. |
*/ |
if (procType == TGSI_PROCESSOR_FRAGMENT) { |
for (i = 0; i < numInputs; i++) { |
if (program->InputFlags[0] & PROG_PARAM_BIT_CYL_WRAP) { |
t->inputs[i] = ureg_DECL_fs_input_cyl(ureg, |
inputSemanticName[i], |
inputSemanticIndex[i], |
interpMode[i], |
TGSI_CYLINDRICAL_WRAP_X); |
} |
else { |
t->inputs[i] = ureg_DECL_fs_input(ureg, |
inputSemanticName[i], |
inputSemanticIndex[i], |
interpMode[i]); |
} |
} |
|
if (program->InputsRead & FRAG_BIT_WPOS) { |
/* Must do this after setting up t->inputs, and before |
* emitting constant references, below: |
*/ |
emit_wpos(st_context(ctx), t, program, ureg); |
} |
|
if (program->InputsRead & FRAG_BIT_FACE) { |
emit_face_var( t, program ); |
} |
|
/* |
* Declare output attributes. |
*/ |
for (i = 0; i < numOutputs; i++) { |
switch (outputSemanticName[i]) { |
case TGSI_SEMANTIC_POSITION: |
t->outputs[i] = ureg_DECL_output( ureg, |
TGSI_SEMANTIC_POSITION, /* Z / Depth */ |
outputSemanticIndex[i] ); |
|
t->outputs[i] = ureg_writemask( t->outputs[i], |
TGSI_WRITEMASK_Z ); |
break; |
case TGSI_SEMANTIC_STENCIL: |
t->outputs[i] = ureg_DECL_output( ureg, |
TGSI_SEMANTIC_STENCIL, /* Stencil */ |
outputSemanticIndex[i] ); |
t->outputs[i] = ureg_writemask( t->outputs[i], |
TGSI_WRITEMASK_Y ); |
break; |
case TGSI_SEMANTIC_COLOR: |
t->outputs[i] = ureg_DECL_output( ureg, |
TGSI_SEMANTIC_COLOR, |
outputSemanticIndex[i] ); |
break; |
default: |
debug_assert(0); |
return 0; |
} |
} |
} |
else if (procType == TGSI_PROCESSOR_GEOMETRY) { |
for (i = 0; i < numInputs; i++) { |
t->inputs[i] = ureg_DECL_gs_input(ureg, |
i, |
inputSemanticName[i], |
inputSemanticIndex[i]); |
} |
|
for (i = 0; i < numOutputs; i++) { |
t->outputs[i] = ureg_DECL_output( ureg, |
outputSemanticName[i], |
outputSemanticIndex[i] ); |
} |
} |
else { |
assert(procType == TGSI_PROCESSOR_VERTEX); |
|
for (i = 0; i < numInputs; i++) { |
t->inputs[i] = ureg_DECL_vs_input(ureg, i); |
} |
|
for (i = 0; i < numOutputs; i++) { |
t->outputs[i] = ureg_DECL_output( ureg, |
outputSemanticName[i], |
outputSemanticIndex[i] ); |
if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && program->Id) { |
/* Writing to the point size result register requires special |
* handling to implement clamping. |
*/ |
static const gl_state_index pointSizeClampState[STATE_LENGTH] |
= { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, 0, 0, 0 }; |
/* XXX: note we are modifying the incoming shader here! Need to |
* do this before emitting the constant decls below, or this |
* will be missed: |
*/ |
unsigned pointSizeClampConst = |
_mesa_add_state_reference(program->Parameters, |
pointSizeClampState); |
struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg ); |
t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst ); |
t->pointSizeResult = t->outputs[i]; |
t->pointSizeOutIndex = i; |
t->outputs[i] = psizregtemp; |
} |
} |
if (passthrough_edgeflags) |
emit_edgeflags( t, program ); |
} |
|
/* Declare address register. |
*/ |
if (program->NumAddressRegs > 0) { |
debug_assert( program->NumAddressRegs == 1 ); |
t->address[0] = ureg_DECL_address( ureg ); |
} |
|
if (program->IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { |
/* If temps are accessed with indirect addressing, declare temporaries |
* in sequential order. Else, we declare them on demand elsewhere. |
*/ |
for (i = 0; i < program->NumTemporaries; i++) { |
/* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ |
t->temps[i] = ureg_DECL_temporary( t->ureg ); |
} |
} |
|
/* Emit constants and immediates. Mesa uses a single index space |
* for these, so we put all the translated regs in t->constants. |
*/ |
if (program->Parameters) { |
t->constants = CALLOC( program->Parameters->NumParameters, |
sizeof t->constants[0] ); |
if (t->constants == NULL) { |
ret = PIPE_ERROR_OUT_OF_MEMORY; |
goto out; |
} |
|
for (i = 0; i < program->Parameters->NumParameters; i++) { |
switch (program->Parameters->Parameters[i].Type) { |
case PROGRAM_ENV_PARAM: |
case PROGRAM_LOCAL_PARAM: |
case PROGRAM_STATE_VAR: |
case PROGRAM_NAMED_PARAM: |
case PROGRAM_UNIFORM: |
t->constants[i] = ureg_DECL_constant( ureg, i ); |
break; |
|
/* Emit immediates only when there's no indirect addressing of |
* the const buffer. |
* FIXME: Be smarter and recognize param arrays: |
* indirect addressing is only valid within the referenced |
* array. |
*/ |
case PROGRAM_CONSTANT: |
if (program->IndirectRegisterFiles & PROGRAM_ANY_CONST) |
t->constants[i] = ureg_DECL_constant( ureg, i ); |
else |
t->constants[i] = |
ureg_DECL_immediate( ureg, |
program->Parameters->ParameterValues[i], |
4 ); |
break; |
default: |
break; |
} |
} |
} |
|
/* texture samplers */ |
for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { |
if (program->SamplersUsed & (1 << i)) { |
t->samplers[i] = ureg_DECL_sampler( ureg, i ); |
} |
} |
|
/* Emit each instruction in turn: |
*/ |
for (i = 0; i < program->NumInstructions; i++) { |
set_insn_start( t, ureg_get_instruction_number( ureg )); |
compile_instruction( t, &program->Instructions[i] ); |
|
if (t->prevInstWrotePointSize && program->Id) { |
/* The previous instruction wrote to the (fake) vertex point size |
* result register. Now we need to clamp that value to the min/max |
* point size range, putting the result into the real point size |
* register. |
* Note that we can't do this easily at the end of program due to |
* possible early return. |
*/ |
set_insn_start( t, ureg_get_instruction_number( ureg )); |
ureg_MAX( t->ureg, |
ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X), |
ureg_src(t->outputs[t->pointSizeOutIndex]), |
ureg_swizzle(t->pointSizeConst, 1,1,1,1)); |
ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X), |
ureg_src(t->outputs[t->pointSizeOutIndex]), |
ureg_swizzle(t->pointSizeConst, 2,2,2,2)); |
} |
t->prevInstWrotePointSize = GL_FALSE; |
} |
|
/* Fix up all emitted labels: |
*/ |
for (i = 0; i < t->labels_count; i++) { |
ureg_fixup_label( ureg, |
t->labels[i].token, |
t->insn[t->labels[i].branch_target] ); |
} |
|
out: |
FREE(t->insn); |
FREE(t->labels); |
FREE(t->constants); |
|
if (t->error) { |
debug_printf("%s: translate error flag set\n", __FUNCTION__); |
} |
|
return ret; |
} |
|
|
/** |
* Tokens cannot be free with free otherwise the builtin gallium |
* malloc debugging will get confused. |
*/ |
void |
st_free_tokens(const struct tgsi_token *tokens) |
{ |
FREE((void *)tokens); |
} |