Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2011 The Chromium OS authors.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "i915_reg.h"
  29. #include "i915_context.h"
  30. #include "i915_fpc.h"
  31.  
  32. #include "pipe/p_shader_tokens.h"
  33. #include "util/u_math.h"
  34. #include "util/u_memory.h"
  35. #include "util/u_string.h"
  36. #include "tgsi/tgsi_parse.h"
  37. #include "tgsi/tgsi_dump.h"
  38. #include "tgsi/tgsi_exec.h"
  39.  
  40. struct i915_optimize_context
  41. {
  42.    int first_write[TGSI_EXEC_NUM_TEMPS];
  43.    int last_read[TGSI_EXEC_NUM_TEMPS];
  44. };
  45.  
  46. static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
  47. {
  48.    return (s1->Register.File == d1->Register.File &&
  49.            s1->Register.Indirect == d1->Register.Indirect &&
  50.            s1->Register.Dimension == d1->Register.Dimension &&
  51.            s1->Register.Index == d1->Register.Index);
  52. }
  53.  
  54. static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
  55. {
  56.    return (d1->Register.File == d2->Register.File &&
  57.            d1->Register.Indirect == d2->Register.Indirect &&
  58.            d1->Register.Dimension == d2->Register.Dimension &&
  59.            d1->Register.Index == d2->Register.Index);
  60. }
  61.  
  62. static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
  63. {
  64.    return (d1->Register.File == d2->Register.File &&
  65.            d1->Register.Indirect == d2->Register.Indirect &&
  66.            d1->Register.Dimension == d2->Register.Dimension &&
  67.            d1->Register.Index == d2->Register.Index &&
  68.            d1->Register.Absolute == d2->Register.Absolute &&
  69.            d1->Register.Negate == d2->Register.Negate);
  70. }
  71.  
  72. const static struct {
  73.    boolean is_texture;
  74.    boolean commutes;
  75.    unsigned neutral_element;
  76.    unsigned num_dst;
  77.    unsigned num_src;
  78. } op_table [TGSI_OPCODE_LAST] = {
  79.    [ TGSI_OPCODE_ABS     ] = { false,  false,                  0,  1,  1 },
  80.    [ TGSI_OPCODE_ADD     ] = { false,   true,  TGSI_SWIZZLE_ZERO,  1,  2 },
  81.    [ TGSI_OPCODE_CEIL    ] = { false,  false,                  0,  1,  1 },
  82.    [ TGSI_OPCODE_CMP     ] = { false,  false,                  0,  1,  2 },
  83.    [ TGSI_OPCODE_COS     ] = { false,  false,                  0,  1,  1 },
  84.    [ TGSI_OPCODE_DDX     ] = { false,  false,                  0,  1,  0 },
  85.    [ TGSI_OPCODE_DDY     ] = { false,  false,                  0,  1,  0 },
  86.    [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
  87.    [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
  88.    [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
  89.    [ TGSI_OPCODE_DPH     ] = { false,  false,                  0,  1,  2 },
  90.    [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
  91.    [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
  92.    [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
  93.    [ TGSI_OPCODE_FLR     ] = { false,  false,                  0,  1,  1 },
  94.    [ TGSI_OPCODE_FRC     ] = { false,  false,                  0,  1,  1 },
  95.    [ TGSI_OPCODE_KILL_IF ] = { false,  false,                  0,  0,  1 },
  96.    [ TGSI_OPCODE_KILL    ] = { false,  false,                  0,  0,  0 },
  97.    [ TGSI_OPCODE_LG2     ] = { false,  false,                  0,  1,  1 },
  98.    [ TGSI_OPCODE_LIT     ] = { false,  false,                  0,  1,  1 },
  99.    [ TGSI_OPCODE_LRP     ] = { false,  false,                  0,  1,  3 },
  100.    [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
  101.    [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
  102.    [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
  103.    [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
  104.    [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
  105.    [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
  106.    [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
  107.    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
  108.    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
  109.    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
  110.    [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
  111.    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
  112.    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
  113.    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
  114.    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
  115.    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
  116.    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
  117.    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
  118.    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
  119.    [ TGSI_OPCODE_SUB     ] = { false,  false,                  0,  1,  2 },
  120.    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
  121.    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
  122.    [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
  123.    [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
  124.    [ TGSI_OPCODE_XPD     ] = { false,  false,                  0,  1,  2 },
  125. };
  126.  
  127. static boolean op_has_dst(unsigned opcode)
  128. {
  129.    return (op_table[opcode].num_dst > 0);
  130. }
  131.  
  132. static int op_num_dst(unsigned opcode)
  133. {
  134.    return op_table[opcode].num_dst;
  135. }
  136.  
  137. static int op_num_src(unsigned opcode)
  138. {
  139.    return op_table[opcode].num_src;
  140. }
  141.  
  142. static boolean op_commutes(unsigned opcode)
  143. {
  144.    return op_table[opcode].commutes;
  145. }
  146.  
  147. static unsigned mask_for_unswizzled(int num_components)
  148. {
  149.    unsigned mask = 0;
  150.    switch(num_components)
  151.    {
  152.       case 4:
  153.          mask |= TGSI_WRITEMASK_W;
  154.       case 3:
  155.          mask |= TGSI_WRITEMASK_Z;
  156.       case 2:
  157.          mask |= TGSI_WRITEMASK_Y;
  158.       case 1:
  159.          mask |= TGSI_WRITEMASK_X;
  160.    }
  161.    return mask;
  162. }
  163.  
  164. static boolean is_unswizzled(struct i915_full_src_register *r,
  165.                              unsigned write_mask)
  166. {
  167.    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
  168.       return FALSE;
  169.    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
  170.       return FALSE;
  171.    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
  172.       return FALSE;
  173.    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
  174.       return FALSE;
  175.    return TRUE;
  176. }
  177.  
  178. static boolean op_is_texture(unsigned opcode)
  179. {
  180.    return op_table[opcode].is_texture;
  181. }
  182.  
  183. static unsigned op_neutral_element(unsigned opcode)
  184. {
  185.    unsigned ne = op_table[opcode].neutral_element;
  186.    if (!ne) {
  187.       debug_printf("No neutral element for opcode %d\n",opcode);
  188.       ne = TGSI_SWIZZLE_ZERO;
  189.    }
  190.    return ne;
  191. }
  192.  
  193. /*
  194.  * Sets the swizzle to the neutral element for the operation for the bits
  195.  * of writemask which are set, swizzle to identity otherwise.
  196.  */
  197. static void set_neutral_element_swizzle(struct i915_full_src_register *r,
  198.                                         unsigned write_mask,
  199.                                         unsigned neutral)
  200. {
  201.    if ( write_mask & TGSI_WRITEMASK_X )
  202.       r->Register.SwizzleX = neutral;
  203.    else
  204.       r->Register.SwizzleX = TGSI_SWIZZLE_X;
  205.  
  206.    if ( write_mask & TGSI_WRITEMASK_Y )
  207.       r->Register.SwizzleY = neutral;
  208.    else
  209.       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
  210.  
  211.    if ( write_mask & TGSI_WRITEMASK_Z )
  212.       r->Register.SwizzleZ = neutral;
  213.    else
  214.       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
  215.  
  216.    if ( write_mask & TGSI_WRITEMASK_W )
  217.       r->Register.SwizzleW = neutral;
  218.    else
  219.       r->Register.SwizzleW = TGSI_SWIZZLE_W;
  220. }
  221.  
  222. static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
  223. {
  224.    o->File      = i->File;
  225.    o->Indirect  = i->Indirect;
  226.    o->Dimension = i->Dimension;
  227.    o->Index     = i->Index;
  228.    o->SwizzleX  = i->SwizzleX;
  229.    o->SwizzleY  = i->SwizzleY;
  230.    o->SwizzleZ  = i->SwizzleZ;
  231.    o->SwizzleW  = i->SwizzleW;
  232.    o->Absolute  = i->Absolute;
  233.    o->Negate    = i->Negate;
  234. }
  235.  
  236. static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
  237. {
  238.    o->File      = i->File;
  239.    o->WriteMask = i->WriteMask;
  240.    o->Indirect  = i->Indirect;
  241.    o->Dimension = i->Dimension;
  242.    o->Index     = i->Index;
  243. }
  244.  
  245. static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
  246. {
  247.    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
  248.    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
  249.  
  250.    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
  251.  
  252.    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
  253.    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
  254.    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
  255. }
  256.  
  257. static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
  258. {
  259.    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
  260.       memcpy(o, i, sizeof(*o));
  261.    else
  262.       copy_instruction(&o->FullInstruction, &i->FullInstruction);
  263.  
  264. }
  265.  
  266. static void liveness_mark_written(struct i915_optimize_context *ctx,
  267.                                   struct i915_full_dst_register *dst_reg,
  268.                                   int pos)
  269. {
  270.    int dst_reg_index;
  271.    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
  272.       dst_reg_index = dst_reg->Register.Index;
  273.       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
  274.       /* dead -> live transition */
  275.       if (ctx->first_write[dst_reg_index] != -1)
  276.          ctx->first_write[dst_reg_index] = pos;
  277.    }
  278. }
  279.  
  280. static void liveness_mark_read(struct i915_optimize_context *ctx,
  281.                                struct i915_full_src_register *src_reg,
  282.                                int pos)
  283. {
  284.    int src_reg_index;
  285.    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
  286.       src_reg_index = src_reg->Register.Index;
  287.       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
  288.       /* live -> dead transition */
  289.       if (ctx->last_read[src_reg_index] != -1)
  290.          ctx->last_read[src_reg_index] = pos;
  291.    }
  292. }
  293.  
  294. static void liveness_analysis(struct i915_optimize_context *ctx,
  295.                               struct i915_token_list *tokens)
  296. {
  297.    struct i915_full_dst_register *dst_reg;
  298.    struct i915_full_src_register *src_reg;
  299.    union i915_full_token *current;
  300.    unsigned opcode;
  301.    int num_dst, num_src;
  302.    int i = 0;
  303.  
  304.    for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
  305.    {
  306.       ctx->first_write[i] = -1;
  307.       ctx->last_read[i] = -1;
  308.    }
  309.  
  310.    for(i = 0; i < tokens->NumTokens; i++)
  311.    {
  312.       current = &tokens->Tokens[i];
  313.  
  314.       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
  315.          continue;
  316.  
  317.       opcode = current->FullInstruction.Instruction.Opcode;
  318.       num_dst = op_num_dst(opcode);
  319.  
  320.       switch(num_dst)
  321.       {
  322.          case 1:
  323.             dst_reg = &current->FullInstruction.Dst[0];
  324.             liveness_mark_written(ctx, dst_reg, i);
  325.          case 0:
  326.             break;
  327.          default:
  328.             debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
  329.             break;
  330.       }
  331.    }
  332.  
  333.    for(i = tokens->NumTokens - 1; i >= 0; i--)
  334.    {
  335.       current = &tokens->Tokens[i];
  336.  
  337.       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
  338.          continue;
  339.  
  340.       opcode = current->FullInstruction.Instruction.Opcode;
  341.       num_src = op_num_src(opcode);
  342.  
  343.       switch(num_src)
  344.       {
  345.          case 3:
  346.             src_reg = &current->FullInstruction.Src[2];
  347.             liveness_mark_read(ctx, src_reg, i);
  348.          case 2:
  349.             src_reg = &current->FullInstruction.Src[1];
  350.             liveness_mark_read(ctx, src_reg, i);
  351.          case 1:
  352.             src_reg = &current->FullInstruction.Src[0];
  353.             liveness_mark_read(ctx, src_reg, i);
  354.          case 0:
  355.             break;
  356.          default:
  357.             debug_printf("Op %d has %d src regs\n", opcode, num_src);
  358.             break;
  359.       }
  360.    }
  361. }
  362.  
  363. static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
  364. {
  365.    int dst_reg_index = dst_reg->Register.Index;
  366.    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
  367.    return (from >= ctx->last_read[dst_reg_index]);
  368. }
  369.  
  370. /* Returns a mask with the components used for a texture access instruction */
  371. static unsigned i915_tex_mask(union i915_full_token *instr)
  372. {
  373.    unsigned mask;
  374.  
  375.    /* Get the number of coords */
  376.    mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
  377.  
  378.    /* Add the W component if projective */
  379.    if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
  380.       mask |= TGSI_WRITEMASK_W;
  381.  
  382.    return mask;
  383. }
  384.  
  385. static boolean target_is_texture2d(uint tex)
  386. {
  387.    switch (tex) {
  388.    case TGSI_TEXTURE_2D:
  389.    case TGSI_TEXTURE_RECT:
  390.       return true;
  391.    default:
  392.       return false;
  393.    }
  394. }
  395.  
  396.  
  397. /*
  398.  * Optimize away useless indirect texture reads:
  399.  *    MOV TEMP[0].xy, IN[0].xyyy
  400.  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
  401.  * into:
  402.  *    TEX TEMP[1], IN[0], SAMP[0], 2D
  403.  *
  404.  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
  405.  */
  406. static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
  407.                                              struct i915_token_list *tokens,
  408.                                              int index)
  409. {
  410.    union i915_full_token *current = &tokens->Tokens[index - 1];
  411.    union i915_full_token *next = &tokens->Tokens[index];
  412.  
  413.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  414.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  415.         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  416.         op_is_texture(next->FullInstruction.Instruction.Opcode) &&
  417.         target_is_texture2d(next->FullInstruction.Texture.Texture) &&
  418.         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
  419.         is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
  420.         unused_from(ctx, &current->FullInstruction.Dst[0], index))
  421.    {
  422.       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
  423.       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  424.    }
  425. }
  426.  
  427. /*
  428.  * Optimize away things like:
  429.  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
  430.  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
  431.  * into:
  432.  *    NOP
  433.  *    MOV OUT[0].xyw, TEMP[1].xyww
  434.  */
  435. static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
  436. {
  437.    struct i915_full_src_register *src_reg1, *src_reg2;
  438.    struct i915_full_dst_register *dst_reg1, *dst_reg2;
  439.    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
  440.  
  441.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  442.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  443.         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  444.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  445.         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
  446.         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
  447.         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
  448.         !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
  449.    {
  450.       src_reg1 = &current->FullInstruction.Src[0];
  451.       dst_reg1 = &current->FullInstruction.Dst[0];
  452.       src_reg2 = &next->FullInstruction.Src[0];
  453.       dst_reg2 = &next->FullInstruction.Dst[0];
  454.  
  455.       /* Start with swizzles from the first mov */
  456.       swizzle_x = src_reg1->Register.SwizzleX;
  457.       swizzle_y = src_reg1->Register.SwizzleY;
  458.       swizzle_z = src_reg1->Register.SwizzleZ;
  459.       swizzle_w = src_reg1->Register.SwizzleW;
  460.  
  461.       /* Pile the second mov on top */
  462.       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
  463.          swizzle_x = src_reg2->Register.SwizzleX;
  464.       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
  465.          swizzle_y = src_reg2->Register.SwizzleY;
  466.       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
  467.          swizzle_z = src_reg2->Register.SwizzleZ;
  468.       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
  469.          swizzle_w = src_reg2->Register.SwizzleW;
  470.  
  471.       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
  472.       src_reg2->Register.SwizzleX = swizzle_x;
  473.       src_reg2->Register.SwizzleY = swizzle_y;
  474.       src_reg2->Register.SwizzleZ = swizzle_z;
  475.       src_reg2->Register.SwizzleW = swizzle_w;
  476.  
  477.       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  478.  
  479.       return;
  480.    }
  481. }
  482.  
  483. /*
  484.  * Optimize away things like:
  485.  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
  486.  *    MOV OUT[0].w, TEMP[2]
  487.  * into:
  488.  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
  489.  * This is useful for optimizing texenv.
  490.  */
  491. static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
  492. {
  493.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  494.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  495.         op_commutes(current->FullInstruction.Instruction.Opcode) &&
  496.         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
  497.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  498.         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
  499.         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
  500.         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
  501.         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
  502.         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
  503.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
  504.    {
  505.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  506.  
  507.       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
  508.       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
  509.                                   next->FullInstruction.Dst[0].Register.WriteMask,
  510.                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
  511.  
  512.       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
  513.                                                            next->FullInstruction.Dst[0].Register.WriteMask;
  514.       return;
  515.    }
  516.  
  517.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  518.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  519.         op_commutes(current->FullInstruction.Instruction.Opcode) &&
  520.         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
  521.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  522.         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
  523.         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
  524.         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
  525.         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
  526.         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
  527.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
  528.    {
  529.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  530.  
  531.       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
  532.       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
  533.                                   next->FullInstruction.Dst[0].Register.WriteMask,
  534.                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
  535.  
  536.       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
  537.                                                            next->FullInstruction.Dst[0].Register.WriteMask;
  538.       return;
  539.    }
  540. }
  541.  
  542. /*
  543.  * Optimize away things like:
  544.  *    MOV TEMP[0].xyz TEMP[0].xyzx
  545.  * into:
  546.  *    NOP
  547.  */
  548. static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
  549. {
  550.    union i915_full_token current;
  551.    copy_token(&current , tgsi_current);
  552.    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  553.         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  554.         op_has_dst(current.FullInstruction.Instruction.Opcode) &&
  555.         current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
  556.         current.FullInstruction.Src[0].Register.Absolute == 0 &&
  557.         current.FullInstruction.Src[0].Register.Negate == 0 &&
  558.         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
  559.         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
  560.    {
  561.       return TRUE;
  562.    }
  563.    return FALSE;
  564. }
  565.  
  566. /*
  567.  * Optimize away things like:
  568.  *    *** TEMP[0], TEMP[1], TEMP[2]
  569.  *    MOV OUT[0] TEMP[0]
  570.  * into:
  571.  *    *** OUT[0], TEMP[1], TEMP[2]
  572.  */
  573. static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
  574.                                                      struct i915_token_list *tokens,
  575.                                                      int index)
  576. {
  577.    union i915_full_token *current = &tokens->Tokens[index - 1];
  578.    union i915_full_token *next = &tokens->Tokens[index];
  579.  
  580.    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
  581.    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  582.         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
  583.         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
  584.         op_has_dst(current->FullInstruction.Instruction.Opcode) &&
  585.         next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
  586.         next->FullInstruction.Src[0].Register.Absolute == 0 &&
  587.         next->FullInstruction.Src[0].Register.Negate == 0 &&
  588.         unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
  589.         current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
  590.         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
  591.         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
  592.         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
  593.    {
  594.       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
  595.  
  596.       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
  597.       return;
  598.    }
  599. }
  600.  
  601. struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
  602. {
  603.    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
  604.    struct tgsi_parse_context parse;
  605.    struct i915_optimize_context *ctx;
  606.    int i = 0;
  607.  
  608.    ctx = malloc(sizeof(*ctx));
  609.  
  610.    out_tokens->NumTokens = 0;
  611.  
  612.    /* Count the tokens */
  613.    tgsi_parse_init( &parse, tokens );
  614.    while( !tgsi_parse_end_of_tokens( &parse ) ) {
  615.       tgsi_parse_token( &parse );
  616.       out_tokens->NumTokens++;
  617.    }
  618.    tgsi_parse_free (&parse);
  619.  
  620.    /* Allocate our tokens */
  621.    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
  622.  
  623.    tgsi_parse_init( &parse, tokens );
  624.    while( !tgsi_parse_end_of_tokens( &parse ) ) {
  625.       tgsi_parse_token( &parse );
  626.  
  627.       if (i915_fpc_useless_mov(&parse.FullToken)) {
  628.          out_tokens->NumTokens--;
  629.          continue;
  630.       }
  631.  
  632.       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
  633.  
  634.       i++;
  635.    }
  636.    tgsi_parse_free (&parse);
  637.  
  638.    liveness_analysis(ctx, out_tokens);
  639.  
  640.    i = 1;
  641.    while( i < out_tokens->NumTokens) {
  642.       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
  643.       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
  644.       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
  645.       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
  646.       i++;
  647.    }
  648.  
  649.    free(ctx);
  650.  
  651.    return out_tokens;
  652. }
  653.  
  654. void i915_optimize_free(struct i915_token_list *tokens)
  655. {
  656.    free(tokens->Tokens);
  657.    free(tokens);
  658. }
  659.  
  660.  
  661.