Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2011 The Chromium OS authors.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "i915_reg.h"
  29. #include "i915_context.h"
  30. #include "i915_fpc.h"
  31.  
  32. #include "pipe/p_shader_tokens.h"
  33. #include "util/u_math.h"
  34. #include "util/u_memory.h"
  35. #include "util/u_string.h"
  36. #include "tgsi/tgsi_parse.h"
  37. #include "tgsi/tgsi_dump.h"
  38.  
  39. static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
  40. {
  41.    return (s1->Register.File == d1->Register.File &&
  42.            s1->Register.Indirect == d1->Register.Indirect &&
  43.            s1->Register.Dimension == d1->Register.Dimension &&
  44.            s1->Register.Index == d1->Register.Index);
  45. }
  46.  
  47. static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
  48. {
  49.    return (d1->Register.File == d2->Register.File &&
  50.            d1->Register.Indirect == d2->Register.Indirect &&
  51.            d1->Register.Dimension == d2->Register.Dimension &&
  52.            d1->Register.Index == d2->Register.Index);
  53. }
  54.  
  55. static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
  56. {
  57.    return (d1->Register.File == d2->Register.File &&
  58.            d1->Register.Indirect == d2->Register.Indirect &&
  59.            d1->Register.Dimension == d2->Register.Dimension &&
  60.            d1->Register.Index == d2->Register.Index &&
  61.            d1->Register.Absolute == d2->Register.Absolute &&
  62.            d1->Register.Negate == d2->Register.Negate);
  63. }
  64.  
  65. static boolean has_destination(unsigned opcode)
  66. {
  67.    return (opcode != TGSI_OPCODE_NOP &&
  68.            opcode != TGSI_OPCODE_KILL_IF &&
  69.            opcode != TGSI_OPCODE_KILL &&
  70.            opcode != TGSI_OPCODE_END &&
  71.            opcode != TGSI_OPCODE_RET);
  72. }
  73.  
  74. static boolean is_unswizzled(struct i915_full_src_register* r,
  75.                              unsigned write_mask)
  76. {
  77.    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
  78.       return FALSE;
  79.    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
  80.       return FALSE;
  81.    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
  82.       return FALSE;
  83.    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
  84.       return FALSE;
  85.    return TRUE;
  86. }
  87.  
  88. static boolean op_commutes(unsigned opcode)
  89. {
  90.    switch(opcode)
  91.    {
  92.       case TGSI_OPCODE_ADD:
  93.       case TGSI_OPCODE_MUL:
  94.       case TGSI_OPCODE_DP2:
  95.       case TGSI_OPCODE_DP3:
  96.       case TGSI_OPCODE_DP4:
  97.          return TRUE;
  98.    }
  99.    return FALSE;
  100. }
  101.  
  102. static unsigned op_neutral_element(unsigned opcode)
  103. {
  104.    switch(opcode)
  105.    {
  106.       case TGSI_OPCODE_ADD:
  107.          return TGSI_SWIZZLE_ZERO;
  108.       case TGSI_OPCODE_MUL:
  109.       case TGSI_OPCODE_DP2:
  110.       case TGSI_OPCODE_DP3:
  111.       case TGSI_OPCODE_DP4:
  112.          return TGSI_SWIZZLE_ONE;
  113.    }
  114.  
  115.    debug_printf("Unknown opcode %d\n",opcode);
  116.    return TGSI_SWIZZLE_ZERO;
  117. }
  118.  
  119. /*
  120.  * Sets the swizzle to the neutral element for the operation for the bits
  121.  * of writemask which are set, swizzle to identity otherwise.
  122.  */
  123. static void set_neutral_element_swizzle(struct i915_full_src_register* r,
  124.                                         unsigned write_mask,
  125.                                         unsigned neutral)
  126. {
  127.    if ( write_mask & TGSI_WRITEMASK_X )
  128.       r->Register.SwizzleX = neutral;
  129.    else
  130.       r->Register.SwizzleX = TGSI_SWIZZLE_X;
  131.  
  132.    if ( write_mask & TGSI_WRITEMASK_Y )
  133.       r->Register.SwizzleY = neutral;
  134.    else
  135.       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
  136.  
  137.    if ( write_mask & TGSI_WRITEMASK_Z )
  138.       r->Register.SwizzleZ = neutral;
  139.    else
  140.       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
  141.  
  142.    if ( write_mask & TGSI_WRITEMASK_W )
  143.       r->Register.SwizzleW = neutral;
  144.    else
  145.       r->Register.SwizzleW = TGSI_SWIZZLE_W;
  146. }
  147.  
  148. static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
  149. {
  150.    o->File      = i->File;
  151.    o->Indirect  = i->Indirect;
  152.    o->Dimension = i->Dimension;
  153.    o->Index     = i->Index;
  154.    o->SwizzleX  = i->SwizzleX;
  155.    o->SwizzleY  = i->SwizzleY;
  156.    o->SwizzleZ  = i->SwizzleZ;
  157.    o->SwizzleW  = i->SwizzleW;
  158.    o->Absolute  = i->Absolute;
  159.    o->Negate    = i->Negate;
  160. }
  161.  
  162. static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
  163. {
  164.    o->File      = i->File;
  165.    o->WriteMask = i->WriteMask;
  166.    o->Indirect  = i->Indirect;
  167.    o->Dimension = i->Dimension;
  168.    o->Index     = i->Index;
  169. }
  170.  
  171. static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
  172. {
  173.    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
  174.    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
  175.  
  176.    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
  177.  
  178.    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
  179.    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
  180.    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
  181. }
  182.  
  183. static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
  184. {
  185.    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
  186.       memcpy(o, i, sizeof(*o));
  187.    else
  188.       copy_instruction(&o->FullInstruction, &i->FullInstruction);
  189.  
  190. }
  191.  
  192. /*
  193.  * Optimize away things like:
  194.  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
  195.  *    MOV OUT[0].w, TEMP[2]
  196.  * into:
  197.  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
  198.  * This is useful for optimizing texenv.
  199.  */
  200. static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
  201. {
  202.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  203.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  204.         op_commutes(current->FullInstruction.Instruction.Opcode) &&
  205.         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
  206.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  207.         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
  208.         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
  209.         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
  210.         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
  211.         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
  212.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
  213.    {
  214.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  215.  
  216.       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
  217.       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
  218.                                   next->FullInstruction.Dst[0].Register.WriteMask,
  219.                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
  220.  
  221.       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
  222.                                                            next->FullInstruction.Dst[0].Register.WriteMask;
  223.       return;
  224.    }
  225.  
  226.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  227.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  228.         op_commutes(current->FullInstruction.Instruction.Opcode) &&
  229.         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
  230.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  231.         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
  232.         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
  233.         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
  234.         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
  235.         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
  236.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
  237.    {
  238.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  239.  
  240.       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
  241.       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
  242.                                   next->FullInstruction.Dst[0].Register.WriteMask,
  243.                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
  244.  
  245.       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
  246.                                                            next->FullInstruction.Dst[0].Register.WriteMask;
  247.       return;
  248.    }
  249. }
  250.  
  251. /*
  252.  * Optimize away things like:
  253.  *    MOV TEMP[0].xyz TEMP[0].xyzx
  254.  * into:
  255.  *    NOP
  256.  */
  257. static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
  258. {
  259.    union i915_full_token current;
  260.    copy_token(&current , tgsi_current);
  261.    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  262.         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  263.         has_destination(current.FullInstruction.Instruction.Opcode) &&
  264.         current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
  265.         current.FullInstruction.Src[0].Register.Absolute == 0 &&
  266.         current.FullInstruction.Src[0].Register.Negate == 0 &&
  267.         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
  268.         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
  269.    {
  270.       return TRUE;
  271.    }
  272.    return FALSE;
  273. }
  274.  
  275. /*
  276.  * Optimize away things like:
  277.  *    *** TEMP[0], TEMP[1], TEMP[2]
  278.  *    MOV OUT[0] TEMP[0]
  279.  * into:
  280.  *    *** OUT[0], TEMP[1], TEMP[2]
  281.  */
  282. static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
  283. {
  284.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  285.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  286.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  287.         has_destination(current->FullInstruction.Instruction.Opcode) &&
  288.         next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
  289.         next->FullInstruction.Src[0].Register.Absolute == 0 &&
  290.         next->FullInstruction.Src[0].Register.Negate == 0 &&
  291.         next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
  292.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
  293.         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
  294.         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
  295.    {
  296.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  297.  
  298.       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
  299.       return;
  300.    }
  301. }
  302.  
  303. struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
  304. {
  305.    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
  306.    struct tgsi_parse_context parse;
  307.    int i = 0;
  308.  
  309.    out_tokens->NumTokens = 0;
  310.  
  311.    /* Count the tokens */
  312.    tgsi_parse_init( &parse, tokens );
  313.    while( !tgsi_parse_end_of_tokens( &parse ) ) {
  314.       tgsi_parse_token( &parse );
  315.       out_tokens->NumTokens++;
  316.    }
  317.    tgsi_parse_free (&parse);
  318.  
  319.    /* Allocate our tokens */
  320.    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
  321.  
  322.    tgsi_parse_init( &parse, tokens );
  323.    while( !tgsi_parse_end_of_tokens( &parse ) ) {
  324.       tgsi_parse_token( &parse );
  325.  
  326.       if (i915_fpc_useless_mov(&parse.FullToken)) {
  327.          out_tokens->NumTokens--;
  328.          continue;
  329.       }
  330.  
  331.       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
  332.  
  333.       if (i > 0) {
  334.          i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
  335.          i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
  336.       }
  337.       i++;
  338.    }
  339.    tgsi_parse_free (&parse);
  340.  
  341.    return out_tokens;
  342. }
  343.  
  344. void i915_optimize_free(struct i915_token_list* tokens)
  345. {
  346.    free(tokens->Tokens);
  347.    free(tokens);
  348. }
  349.  
  350.  
  351.