0,0 → 1,350 |
/************************************************************************** |
* |
* Copyright 2011 The Chromium OS authors. |
* All Rights Reserved. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the |
* "Software"), to deal in the Software without restriction, including |
* without limitation the rights to use, copy, modify, merge, publish, |
* distribute, sub license, and/or sell copies of the Software, and to |
* permit persons to whom the Software is furnished to do so, subject to |
* the following conditions: |
* |
* The above copyright notice and this permission notice (including the |
* next paragraph) shall be included in all copies or substantial portions |
* of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
* IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR |
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
* |
**************************************************************************/ |
|
#include "i915_reg.h" |
#include "i915_context.h" |
#include "i915_fpc.h" |
|
#include "pipe/p_shader_tokens.h" |
#include "util/u_math.h" |
#include "util/u_memory.h" |
#include "util/u_string.h" |
#include "tgsi/tgsi_parse.h" |
#include "tgsi/tgsi_dump.h" |
|
static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1) |
{ |
return (s1->Register.File == d1->Register.File && |
s1->Register.Indirect == d1->Register.Indirect && |
s1->Register.Dimension == d1->Register.Dimension && |
s1->Register.Index == d1->Register.Index); |
} |
|
static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2) |
{ |
return (d1->Register.File == d2->Register.File && |
d1->Register.Indirect == d2->Register.Indirect && |
d1->Register.Dimension == d2->Register.Dimension && |
d1->Register.Index == d2->Register.Index); |
} |
|
static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2) |
{ |
return (d1->Register.File == d2->Register.File && |
d1->Register.Indirect == d2->Register.Indirect && |
d1->Register.Dimension == d2->Register.Dimension && |
d1->Register.Index == d2->Register.Index && |
d1->Register.Absolute == d2->Register.Absolute && |
d1->Register.Negate == d2->Register.Negate); |
} |
|
static boolean has_destination(unsigned opcode) |
{ |
return (opcode != TGSI_OPCODE_NOP && |
opcode != TGSI_OPCODE_KILL_IF && |
opcode != TGSI_OPCODE_KILL && |
opcode != TGSI_OPCODE_END && |
opcode != TGSI_OPCODE_RET); |
} |
|
static boolean is_unswizzled(struct i915_full_src_register* r, |
unsigned write_mask) |
{ |
if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) |
return FALSE; |
if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) |
return FALSE; |
if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) |
return FALSE; |
if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) |
return FALSE; |
return TRUE; |
} |
|
static boolean op_commutes(unsigned opcode) |
{ |
switch(opcode) |
{ |
case TGSI_OPCODE_ADD: |
case TGSI_OPCODE_MUL: |
case TGSI_OPCODE_DP2: |
case TGSI_OPCODE_DP3: |
case TGSI_OPCODE_DP4: |
return TRUE; |
} |
return FALSE; |
} |
|
static unsigned op_neutral_element(unsigned opcode) |
{ |
switch(opcode) |
{ |
case TGSI_OPCODE_ADD: |
return TGSI_SWIZZLE_ZERO; |
case TGSI_OPCODE_MUL: |
case TGSI_OPCODE_DP2: |
case TGSI_OPCODE_DP3: |
case TGSI_OPCODE_DP4: |
return TGSI_SWIZZLE_ONE; |
} |
|
debug_printf("Unknown opcode %d\n",opcode); |
return TGSI_SWIZZLE_ZERO; |
} |
|
/* |
* Sets the swizzle to the neutral element for the operation for the bits |
* of writemask which are set, swizzle to identity otherwise. |
*/ |
static void set_neutral_element_swizzle(struct i915_full_src_register* r, |
unsigned write_mask, |
unsigned neutral) |
{ |
if ( write_mask & TGSI_WRITEMASK_X ) |
r->Register.SwizzleX = neutral; |
else |
r->Register.SwizzleX = TGSI_SWIZZLE_X; |
|
if ( write_mask & TGSI_WRITEMASK_Y ) |
r->Register.SwizzleY = neutral; |
else |
r->Register.SwizzleY = TGSI_SWIZZLE_Y; |
|
if ( write_mask & TGSI_WRITEMASK_Z ) |
r->Register.SwizzleZ = neutral; |
else |
r->Register.SwizzleZ = TGSI_SWIZZLE_Z; |
|
if ( write_mask & TGSI_WRITEMASK_W ) |
r->Register.SwizzleW = neutral; |
else |
r->Register.SwizzleW = TGSI_SWIZZLE_W; |
} |
|
static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i) |
{ |
o->File = i->File; |
o->Indirect = i->Indirect; |
o->Dimension = i->Dimension; |
o->Index = i->Index; |
o->SwizzleX = i->SwizzleX; |
o->SwizzleY = i->SwizzleY; |
o->SwizzleZ = i->SwizzleZ; |
o->SwizzleW = i->SwizzleW; |
o->Absolute = i->Absolute; |
o->Negate = i->Negate; |
} |
|
static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i) |
{ |
o->File = i->File; |
o->WriteMask = i->WriteMask; |
o->Indirect = i->Indirect; |
o->Dimension = i->Dimension; |
o->Index = i->Index; |
} |
|
static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i) |
{ |
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); |
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); |
|
copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); |
|
copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); |
copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); |
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); |
} |
|
static void copy_token(union i915_full_token* o, union tgsi_full_token* i) |
{ |
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) |
memcpy(o, i, sizeof(*o)); |
else |
copy_instruction(&o->FullInstruction, &i->FullInstruction); |
|
} |
|
/* |
* Optimize away things like: |
* MUL OUT[0].xyz, TEMP[1], TEMP[2] |
* MOV OUT[0].w, TEMP[2] |
* into: |
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] |
* This is useful for optimizing texenv. |
*/ |
static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next) |
{ |
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
op_commutes(current->FullInstruction.Instruction.Opcode) && |
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && |
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && |
same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[1]) && |
!same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && |
is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && |
is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && |
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) |
{ |
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); |
set_neutral_element_swizzle(¤t->FullInstruction.Src[0], |
next->FullInstruction.Dst[0].Register.WriteMask, |
op_neutral_element(current->FullInstruction.Instruction.Opcode)); |
|
current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | |
next->FullInstruction.Dst[0].Register.WriteMask; |
return; |
} |
|
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
op_commutes(current->FullInstruction.Instruction.Opcode) && |
current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate && |
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
same_dst_reg(&next->FullInstruction.Dst[0], ¤t->FullInstruction.Dst[0]) && |
same_src_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0]) && |
!same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) && |
is_unswizzled(¤t->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) && |
is_unswizzled(¤t->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) && |
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) ) |
{ |
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
|
set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); |
set_neutral_element_swizzle(¤t->FullInstruction.Src[1], |
next->FullInstruction.Dst[0].Register.WriteMask, |
op_neutral_element(current->FullInstruction.Instruction.Opcode)); |
|
current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask | |
next->FullInstruction.Dst[0].Register.WriteMask; |
return; |
} |
} |
|
/* |
* Optimize away things like: |
* MOV TEMP[0].xyz TEMP[0].xyzx |
* into: |
* NOP |
*/ |
static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current) |
{ |
union i915_full_token current; |
copy_token(¤t , tgsi_current); |
if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
has_destination(current.FullInstruction.Instruction.Opcode) && |
current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && |
current.FullInstruction.Src[0].Register.Absolute == 0 && |
current.FullInstruction.Src[0].Register.Negate == 0 && |
is_unswizzled(¤t.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) && |
same_src_dst_reg(¤t.FullInstruction.Src[0], ¤t.FullInstruction.Dst[0]) ) |
{ |
return TRUE; |
} |
return FALSE; |
} |
|
/* |
* Optimize away things like: |
* *** TEMP[0], TEMP[1], TEMP[2] |
* MOV OUT[0] TEMP[0] |
* into: |
* *** OUT[0], TEMP[1], TEMP[2] |
*/ |
static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next) |
{ |
if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && |
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && |
has_destination(current->FullInstruction.Instruction.Opcode) && |
next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE && |
next->FullInstruction.Src[0].Register.Absolute == 0 && |
next->FullInstruction.Src[0].Register.Negate == 0 && |
next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT && |
is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) && |
current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask && |
same_src_dst_reg(&next->FullInstruction.Src[0], ¤t->FullInstruction.Dst[0]) ) |
{ |
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; |
|
current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; |
return; |
} |
} |
|
struct i915_token_list* i915_optimize(const struct tgsi_token *tokens) |
{ |
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); |
struct tgsi_parse_context parse; |
int i = 0; |
|
out_tokens->NumTokens = 0; |
|
/* Count the tokens */ |
tgsi_parse_init( &parse, tokens ); |
while( !tgsi_parse_end_of_tokens( &parse ) ) { |
tgsi_parse_token( &parse ); |
out_tokens->NumTokens++; |
} |
tgsi_parse_free (&parse); |
|
/* Allocate our tokens */ |
out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); |
|
tgsi_parse_init( &parse, tokens ); |
while( !tgsi_parse_end_of_tokens( &parse ) ) { |
tgsi_parse_token( &parse ); |
|
if (i915_fpc_useless_mov(&parse.FullToken)) { |
out_tokens->NumTokens--; |
continue; |
} |
|
copy_token(&out_tokens->Tokens[i] , &parse.FullToken); |
|
if (i > 0) { |
i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); |
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); |
} |
i++; |
} |
tgsi_parse_free (&parse); |
|
return out_tokens; |
} |
|
void i915_optimize_free(struct i915_token_list* tokens) |
{ |
free(tokens->Tokens); |
free(tokens); |
} |
|
|