Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  20.  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21.  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24.  
  25.  
  26. #include "main/glheader.h"
  27. #include "main/context.h"
  28. #include "main/macros.h"
  29. #include "program.h"
  30. #include "prog_instruction.h"
  31. #include "prog_optimize.h"
  32. #include "prog_print.h"
  33.  
  34.  
  35. #define MAX_LOOP_NESTING 50
  36. /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
  37.  * register allocate many temporary values into that small number of
  38.  * temps.  So allow large temporary indices coming into the register
  39.  * allocator.
  40.  */
  41. #define REG_ALLOCATE_MAX_PROGRAM_TEMPS  ((1 << INST_INDEX_BITS) - 1)
  42.  
  43. static GLboolean dbg = GL_FALSE;
  44.  
  45. #define NO_MASK 0xf
  46.  
  47. /**
  48.  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
  49.  * are read from the given src in this instruction, We also provide
  50.  * one optional masks which may mask other components in the dst
  51.  * register
  52.  */
  53. static GLuint
  54. get_src_arg_mask(const struct prog_instruction *inst,
  55.                  GLuint arg, GLuint dst_mask)
  56. {
  57.    GLuint read_mask, channel_mask;
  58.    GLuint comp;
  59.  
  60.    assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
  61.  
  62.    /* Form the dst register, find the written channels */
  63.    if (inst->CondUpdate) {
  64.       channel_mask = WRITEMASK_XYZW;
  65.    }
  66.    else {
  67.       switch (inst->Opcode) {
  68.       case OPCODE_MOV:
  69.       case OPCODE_MIN:
  70.       case OPCODE_MAX:
  71.       case OPCODE_ABS:
  72.       case OPCODE_ADD:
  73.       case OPCODE_MAD:
  74.       case OPCODE_MUL:
  75.       case OPCODE_SUB:
  76.       case OPCODE_CMP:
  77.       case OPCODE_FLR:
  78.       case OPCODE_FRC:
  79.       case OPCODE_LRP:
  80.       case OPCODE_SEQ:
  81.       case OPCODE_SGE:
  82.       case OPCODE_SGT:
  83.       case OPCODE_SLE:
  84.       case OPCODE_SLT:
  85.       case OPCODE_SNE:
  86.       case OPCODE_SSG:
  87.          channel_mask = inst->DstReg.WriteMask & dst_mask;
  88.          break;
  89.       case OPCODE_RCP:
  90.       case OPCODE_SIN:
  91.       case OPCODE_COS:
  92.       case OPCODE_RSQ:
  93.       case OPCODE_POW:
  94.       case OPCODE_EX2:
  95.       case OPCODE_LOG:
  96.          channel_mask = WRITEMASK_X;
  97.          break;
  98.       case OPCODE_DP2:
  99.          channel_mask = WRITEMASK_XY;
  100.          break;
  101.       case OPCODE_DP3:
  102.       case OPCODE_XPD:
  103.          channel_mask = WRITEMASK_XYZ;
  104.          break;
  105.       default:
  106.          channel_mask = WRITEMASK_XYZW;
  107.          break;
  108.       }
  109.    }
  110.  
  111.    /* Now, given the src swizzle and the written channels, find which
  112.     * components are actually read
  113.     */
  114.    read_mask = 0x0;
  115.    for (comp = 0; comp < 4; ++comp) {
  116.       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
  117.       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
  118.          read_mask |= 1 << coord;
  119.    }
  120.  
  121.    return read_mask;
  122. }
  123.  
  124.  
  125. /**
  126.  * For a MOV instruction, compute a write mask when src register also has
  127.  * a mask
  128.  */
  129. static GLuint
  130. get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
  131. {
  132.    const GLuint mask = mov->DstReg.WriteMask;
  133.    GLuint comp;
  134.    GLuint updated_mask = 0x0;
  135.  
  136.    assert(mov->Opcode == OPCODE_MOV);
  137.  
  138.    for (comp = 0; comp < 4; ++comp) {
  139.       GLuint src_comp;
  140.       if ((mask & (1 << comp)) == 0)
  141.          continue;
  142.       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
  143.       if ((src_mask & (1 << src_comp)) == 0)
  144.          continue;
  145.       updated_mask |= 1 << comp;
  146.    }
  147.  
  148.    return updated_mask;
  149. }
  150.  
  151.  
  152. /**
  153.  * Ensure that the swizzle is regular.  That is, all of the swizzle
  154.  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
  155.  */
  156. static GLboolean
  157. is_swizzle_regular(GLuint swz)
  158. {
  159.    return GET_SWZ(swz,0) <= SWIZZLE_W &&
  160.           GET_SWZ(swz,1) <= SWIZZLE_W &&
  161.           GET_SWZ(swz,2) <= SWIZZLE_W &&
  162.           GET_SWZ(swz,3) <= SWIZZLE_W;
  163. }
  164.  
  165.  
  166. /**
  167.  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
  168.  * \return number of instructions removed
  169.  */
  170. static GLuint
  171. remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
  172. {
  173.    GLint i, removeEnd = 0, removeCount = 0;
  174.    GLuint totalRemoved = 0;
  175.  
  176.    /* go backward */
  177.    for (i = prog->NumInstructions - 1; i >= 0; i--) {
  178.       if (removeFlags[i]) {
  179.          totalRemoved++;
  180.          if (removeCount == 0) {
  181.             /* begin a run of instructions to remove */
  182.             removeEnd = i;
  183.             removeCount = 1;
  184.          }
  185.          else {
  186.             /* extend the run of instructions to remove */
  187.             removeCount++;
  188.          }
  189.       }
  190.       else {
  191.          /* don't remove this instruction, but check if the preceeding
  192.           * instructions are to be removed.
  193.           */
  194.          if (removeCount > 0) {
  195.             GLint removeStart = removeEnd - removeCount + 1;
  196.             _mesa_delete_instructions(prog, removeStart, removeCount);
  197.             removeStart = removeCount = 0; /* reset removal info */
  198.          }
  199.       }
  200.    }
  201.    /* Finish removing if the first instruction was to be removed. */
  202.    if (removeCount > 0) {
  203.       GLint removeStart = removeEnd - removeCount + 1;
  204.       _mesa_delete_instructions(prog, removeStart, removeCount);
  205.    }
  206.    return totalRemoved;
  207. }
  208.  
  209.  
  210. /**
  211.  * Remap register indexes according to map.
  212.  * \param prog  the program to search/replace
  213.  * \param file  the type of register file to search/replace
  214.  * \param map  maps old register indexes to new indexes
  215.  */
  216. static void
  217. replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
  218. {
  219.    GLuint i;
  220.  
  221.    for (i = 0; i < prog->NumInstructions; i++) {
  222.       struct prog_instruction *inst = prog->Instructions + i;
  223.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  224.       GLuint j;
  225.       for (j = 0; j < numSrc; j++) {
  226.          if (inst->SrcReg[j].File == file) {
  227.             GLuint index = inst->SrcReg[j].Index;
  228.             assert(map[index] >= 0);
  229.             inst->SrcReg[j].Index = map[index];
  230.          }
  231.       }
  232.       if (inst->DstReg.File == file) {
  233.          const GLuint index = inst->DstReg.Index;
  234.          assert(map[index] >= 0);
  235.          inst->DstReg.Index = map[index];
  236.       }
  237.    }
  238. }
  239.  
  240.  
  241. /**
  242.  * Remove dead instructions from the given program.
  243.  * This is very primitive for now.  Basically look for temp registers
  244.  * that are written to but never read.  Remove any instructions that
  245.  * write to such registers.  Be careful with condition code setters.
  246.  */
  247. static GLboolean
  248. _mesa_remove_dead_code_global(struct gl_program *prog)
  249. {
  250.    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
  251.    GLboolean *removeInst; /* per-instruction removal flag */
  252.    GLuint i, rem = 0, comp;
  253.  
  254.    memset(tempRead, 0, sizeof(tempRead));
  255.  
  256.    if (dbg) {
  257.       printf("Optimize: Begin dead code removal\n");
  258.       /*_mesa_print_program(prog);*/
  259.    }
  260.  
  261.    removeInst =
  262.       calloc(prog->NumInstructions, sizeof(GLboolean));
  263.  
  264.    /* Determine which temps are read and written */
  265.    for (i = 0; i < prog->NumInstructions; i++) {
  266.       const struct prog_instruction *inst = prog->Instructions + i;
  267.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  268.       GLuint j;
  269.  
  270.       /* check src regs */
  271.       for (j = 0; j < numSrc; j++) {
  272.          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  273.             const GLuint index = inst->SrcReg[j].Index;
  274.             GLuint read_mask;
  275.             assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  276.             read_mask = get_src_arg_mask(inst, j, NO_MASK);
  277.  
  278.             if (inst->SrcReg[j].RelAddr) {
  279.                if (dbg)
  280.                   printf("abort remove dead code (indirect temp)\n");
  281.                goto done;
  282.             }
  283.  
  284.             for (comp = 0; comp < 4; comp++) {
  285.                const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
  286.                if (swz <= SWIZZLE_W) {
  287.                   if ((read_mask & (1 << swz)) == 0)
  288.                      continue;
  289.                   tempRead[index][swz] = GL_TRUE;
  290.                }
  291.             }
  292.          }
  293.       }
  294.  
  295.       /* check dst reg */
  296.       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  297.          const GLuint index = inst->DstReg.Index;
  298.          assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  299.  
  300.          if (inst->DstReg.RelAddr) {
  301.             if (dbg)
  302.                printf("abort remove dead code (indirect temp)\n");
  303.             goto done;
  304.          }
  305.  
  306.          if (inst->CondUpdate) {
  307.             /* If we're writing to this register and setting condition
  308.              * codes we cannot remove the instruction.  Prevent removal
  309.              * by setting the 'read' flag.
  310.              */
  311.             tempRead[index][0] = GL_TRUE;
  312.             tempRead[index][1] = GL_TRUE;
  313.             tempRead[index][2] = GL_TRUE;
  314.             tempRead[index][3] = GL_TRUE;
  315.          }
  316.       }
  317.    }
  318.  
  319.    /* find instructions that write to dead registers, flag for removal */
  320.    for (i = 0; i < prog->NumInstructions; i++) {
  321.       struct prog_instruction *inst = prog->Instructions + i;
  322.       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
  323.  
  324.       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
  325.          GLint chan, index = inst->DstReg.Index;
  326.  
  327.          for (chan = 0; chan < 4; chan++) {
  328.             if (!tempRead[index][chan] &&
  329.                 inst->DstReg.WriteMask & (1 << chan)) {
  330.                if (dbg) {
  331.                   printf("Remove writemask on %u.%c\n", i,
  332.                                chan == 3 ? 'w' : 'x' + chan);
  333.                }
  334.                inst->DstReg.WriteMask &= ~(1 << chan);
  335.                rem++;
  336.             }
  337.          }
  338.  
  339.          if (inst->DstReg.WriteMask == 0) {
  340.             /* If we cleared all writes, the instruction can be removed. */
  341.             if (dbg)
  342.                printf("Remove instruction %u: \n", i);
  343.             removeInst[i] = GL_TRUE;
  344.          }
  345.       }
  346.    }
  347.  
  348.    /* now remove the instructions which aren't needed */
  349.    rem = remove_instructions(prog, removeInst);
  350.  
  351.    if (dbg) {
  352.       printf("Optimize: End dead code removal.\n");
  353.       printf("  %u channel writes removed\n", rem);
  354.       printf("  %u instructions removed\n", rem);
  355.       /*_mesa_print_program(prog);*/
  356.    }
  357.  
  358. done:
  359.    free(removeInst);
  360.    return rem != 0;
  361. }
  362.  
  363.  
  364. enum inst_use
  365. {
  366.    READ,
  367.    WRITE,
  368.    FLOW,
  369.    END
  370. };
  371.  
  372.  
  373. /**
  374.  * Scan forward in program from 'start' for the next occurances of TEMP[index].
  375.  * We look if an instruction reads the component given by the masks and if they
  376.  * are overwritten.
  377.  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
  378.  * that we can't look further.
  379.  */
  380. static enum inst_use
  381. find_next_use(const struct gl_program *prog,
  382.               GLuint start,
  383.               GLuint index,
  384.               GLuint mask)
  385. {
  386.    GLuint i;
  387.  
  388.    for (i = start; i < prog->NumInstructions; i++) {
  389.       const struct prog_instruction *inst = prog->Instructions + i;
  390.       switch (inst->Opcode) {
  391.       case OPCODE_BGNLOOP:
  392.       case OPCODE_BGNSUB:
  393.       case OPCODE_CAL:
  394.       case OPCODE_CONT:
  395.       case OPCODE_IF:
  396.       case OPCODE_ELSE:
  397.       case OPCODE_ENDIF:
  398.       case OPCODE_ENDLOOP:
  399.       case OPCODE_ENDSUB:
  400.       case OPCODE_RET:
  401.          return FLOW;
  402.       case OPCODE_END:
  403.          return END;
  404.       default:
  405.          {
  406.             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  407.             GLuint j;
  408.             for (j = 0; j < numSrc; j++) {
  409.                if (inst->SrcReg[j].RelAddr ||
  410.                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
  411.                    inst->SrcReg[j].Index == (GLint)index &&
  412.                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
  413.                   return READ;
  414.             }
  415.             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
  416.                 inst->DstReg.File == PROGRAM_TEMPORARY &&
  417.                 inst->DstReg.Index == index) {
  418.                mask &= ~inst->DstReg.WriteMask;
  419.                if (mask == 0)
  420.                   return WRITE;
  421.             }
  422.          }
  423.       }
  424.    }
  425.    return END;
  426. }
  427.  
  428.  
  429. /**
  430.  * Is the given instruction opcode a flow-control opcode?
  431.  * XXX maybe move this into prog_instruction.[ch]
  432.  */
  433. static GLboolean
  434. _mesa_is_flow_control_opcode(enum prog_opcode opcode)
  435. {
  436.    switch (opcode) {
  437.    case OPCODE_BGNLOOP:
  438.    case OPCODE_BGNSUB:
  439.    case OPCODE_CAL:
  440.    case OPCODE_CONT:
  441.    case OPCODE_IF:
  442.    case OPCODE_ELSE:
  443.    case OPCODE_END:
  444.    case OPCODE_ENDIF:
  445.    case OPCODE_ENDLOOP:
  446.    case OPCODE_ENDSUB:
  447.    case OPCODE_RET:
  448.       return GL_TRUE;
  449.    default:
  450.       return GL_FALSE;
  451.    }
  452. }
  453.  
  454.  
  455. /**
  456.  * Test if the given instruction is a simple MOV (no conditional updating,
  457.  * not relative addressing, no negation/abs, etc).
  458.  */
  459. static GLboolean
  460. can_downward_mov_be_modifed(const struct prog_instruction *mov)
  461. {
  462.    return
  463.       mov->Opcode == OPCODE_MOV &&
  464.       mov->CondUpdate == GL_FALSE &&
  465.       mov->SrcReg[0].RelAddr == 0 &&
  466.       mov->SrcReg[0].Negate == 0 &&
  467.       mov->SrcReg[0].Abs == 0 &&
  468.       mov->SrcReg[0].HasIndex2 == 0 &&
  469.       mov->SrcReg[0].RelAddr2 == 0 &&
  470.       mov->DstReg.RelAddr == 0 &&
  471.       mov->DstReg.CondMask == COND_TR;
  472. }
  473.  
  474.  
  475. static GLboolean
  476. can_upward_mov_be_modifed(const struct prog_instruction *mov)
  477. {
  478.    return
  479.       can_downward_mov_be_modifed(mov) &&
  480.       mov->DstReg.File == PROGRAM_TEMPORARY &&
  481.       mov->SaturateMode == SATURATE_OFF;
  482. }
  483.  
  484.  
  485. /**
  486.  * Try to remove use of extraneous MOV instructions, to free them up for dead
  487.  * code removal.
  488.  */
  489. static void
  490. _mesa_remove_extra_move_use(struct gl_program *prog)
  491. {
  492.    GLuint i, j;
  493.  
  494.    if (dbg) {
  495.       printf("Optimize: Begin remove extra move use\n");
  496.       _mesa_print_program(prog);
  497.    }
  498.  
  499.    /*
  500.     * Look for sequences such as this:
  501.     *    MOV tmpX, arg0;
  502.     *    ...
  503.     *    FOO tmpY, tmpX, arg1;
  504.     * and convert into:
  505.     *    MOV tmpX, arg0;
  506.     *    ...
  507.     *    FOO tmpY, arg0, arg1;
  508.     */
  509.  
  510.    for (i = 0; i + 1 < prog->NumInstructions; i++) {
  511.       const struct prog_instruction *mov = prog->Instructions + i;
  512.       GLuint dst_mask, src_mask;
  513.       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
  514.          continue;
  515.  
  516.       /* Scanning the code, we maintain the components which are still active in
  517.        * these two masks
  518.        */
  519.       dst_mask = mov->DstReg.WriteMask;
  520.       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
  521.  
  522.       /* Walk through remaining instructions until the or src reg gets
  523.        * rewritten or we get into some flow-control, eliminating the use of
  524.        * this MOV.
  525.        */
  526.       for (j = i + 1; j < prog->NumInstructions; j++) {
  527.          struct prog_instruction *inst2 = prog->Instructions + j;
  528.          GLuint arg;
  529.  
  530.          if (_mesa_is_flow_control_opcode(inst2->Opcode))
  531.              break;
  532.  
  533.          /* First rewrite this instruction's args if appropriate. */
  534.          for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
  535.             GLuint comp, read_mask;
  536.  
  537.             if (inst2->SrcReg[arg].File != mov->DstReg.File ||
  538.                 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
  539.                 inst2->SrcReg[arg].RelAddr ||
  540.                 inst2->SrcReg[arg].Abs)
  541.                continue;
  542.             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
  543.  
  544.             /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
  545.              * components read still come from the mov instructions
  546.              */
  547.             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
  548.                (read_mask & dst_mask) == read_mask) {
  549.                for (comp = 0; comp < 4; comp++) {
  550.                   const GLuint inst2_swz =
  551.                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
  552.                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
  553.                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
  554.                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
  555.                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
  556.                                                   inst2_swz) & 0x1) << comp);
  557.                }
  558.                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
  559.                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
  560.             }
  561.          }
  562.  
  563.          /* The source of MOV is written. This potentially deactivates some
  564.           * components from the src and dst of the MOV instruction
  565.           */
  566.          if (inst2->DstReg.File == mov->DstReg.File &&
  567.              (inst2->DstReg.RelAddr ||
  568.               inst2->DstReg.Index == mov->DstReg.Index)) {
  569.             dst_mask &= ~inst2->DstReg.WriteMask;
  570.             src_mask = get_src_arg_mask(mov, 0, dst_mask);
  571.          }
  572.  
  573.          /* Idem when the destination of mov is written */
  574.          if (inst2->DstReg.File == mov->SrcReg[0].File &&
  575.              (inst2->DstReg.RelAddr ||
  576.               inst2->DstReg.Index == mov->SrcReg[0].Index)) {
  577.             src_mask &= ~inst2->DstReg.WriteMask;
  578.             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
  579.          }
  580.          if (dst_mask == 0)
  581.             break;
  582.       }
  583.    }
  584.  
  585.    if (dbg) {
  586.       printf("Optimize: End remove extra move use.\n");
  587.       /*_mesa_print_program(prog);*/
  588.    }
  589. }
  590.  
  591.  
  592. /**
  593.  * Complements dead_code_global. Try to remove code in block of code by
  594.  * carefully monitoring the swizzles. Both functions should be merged into one
  595.  * with a proper control flow graph
  596.  */
  597. static GLboolean
  598. _mesa_remove_dead_code_local(struct gl_program *prog)
  599. {
  600.    GLboolean *removeInst;
  601.    GLuint i, arg, rem = 0;
  602.  
  603.    removeInst =
  604.       calloc(prog->NumInstructions, sizeof(GLboolean));
  605.  
  606.    for (i = 0; i < prog->NumInstructions; i++) {
  607.       const struct prog_instruction *inst = prog->Instructions + i;
  608.       const GLuint index = inst->DstReg.Index;
  609.       const GLuint mask = inst->DstReg.WriteMask;
  610.       enum inst_use use;
  611.  
  612.       /* We must deactivate the pass as soon as some indirection is used */
  613.       if (inst->DstReg.RelAddr)
  614.          goto done;
  615.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
  616.          if (inst->SrcReg[arg].RelAddr)
  617.             goto done;
  618.  
  619.       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
  620.           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
  621.           inst->DstReg.File != PROGRAM_TEMPORARY ||
  622.           inst->DstReg.RelAddr)
  623.          continue;
  624.  
  625.       use = find_next_use(prog, i+1, index, mask);
  626.       if (use == WRITE || use == END)
  627.          removeInst[i] = GL_TRUE;
  628.    }
  629.  
  630.    rem = remove_instructions(prog, removeInst);
  631.  
  632. done:
  633.    free(removeInst);
  634.    return rem != 0;
  635. }
  636.  
  637.  
  638. /**
  639.  * Try to inject the destination of mov as the destination of inst and recompute
  640.  * the swizzles operators for the sources of inst if required. Return GL_TRUE
  641.  * of the substitution was possible, GL_FALSE otherwise
  642.  */
  643. static GLboolean
  644. _mesa_merge_mov_into_inst(struct prog_instruction *inst,
  645.                           const struct prog_instruction *mov)
  646. {
  647.    /* Indirection table which associates destination and source components for
  648.     * the mov instruction
  649.     */
  650.    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
  651.  
  652.    /* Some components are not written by inst. We cannot remove the mov */
  653.    if (mask != (inst->DstReg.WriteMask & mask))
  654.       return GL_FALSE;
  655.  
  656.    inst->SaturateMode |= mov->SaturateMode;
  657.  
  658.    /* Depending on the instruction, we may need to recompute the swizzles.
  659.     * Also, some other instructions (like TEX) are not linear. We will only
  660.     * consider completely active sources and destinations
  661.     */
  662.    switch (inst->Opcode) {
  663.  
  664.    /* Carstesian instructions: we compute the swizzle */
  665.    case OPCODE_MOV:
  666.    case OPCODE_MIN:
  667.    case OPCODE_MAX:
  668.    case OPCODE_ABS:
  669.    case OPCODE_ADD:
  670.    case OPCODE_MAD:
  671.    case OPCODE_MUL:
  672.    case OPCODE_SUB:
  673.    {
  674.       GLuint dst_to_src_comp[4] = {0,0,0,0};
  675.       GLuint dst_comp, arg;
  676.       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  677.          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
  678.             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
  679.             assert(src_comp < 4);
  680.             dst_to_src_comp[dst_comp] = src_comp;
  681.          }
  682.       }
  683.  
  684.       /* Patch each source of the instruction */
  685.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
  686.          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
  687.          inst->SrcReg[arg].Swizzle = 0;
  688.  
  689.          /* Reset each active component of the swizzle */
  690.          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  691.             GLuint src_comp, arg_comp;
  692.             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
  693.                continue;
  694.             src_comp = dst_to_src_comp[dst_comp];
  695.             assert(src_comp < 4);
  696.             arg_comp = GET_SWZ(arg_swz, src_comp);
  697.             assert(arg_comp < 4);
  698.             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
  699.          }
  700.       }
  701.       inst->DstReg = mov->DstReg;
  702.       return GL_TRUE;
  703.    }
  704.  
  705.    /* Dot products and scalar instructions: we only change the destination */
  706.    case OPCODE_RCP:
  707.    case OPCODE_SIN:
  708.    case OPCODE_COS:
  709.    case OPCODE_RSQ:
  710.    case OPCODE_POW:
  711.    case OPCODE_EX2:
  712.    case OPCODE_LOG:
  713.    case OPCODE_DP2:
  714.    case OPCODE_DP3:
  715.    case OPCODE_DP4:
  716.       inst->DstReg = mov->DstReg;
  717.       return GL_TRUE;
  718.  
  719.    /* All other instructions require fully active components with no swizzle */
  720.    default:
  721.       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
  722.           inst->DstReg.WriteMask != WRITEMASK_XYZW)
  723.          return GL_FALSE;
  724.       inst->DstReg = mov->DstReg;
  725.       return GL_TRUE;
  726.    }
  727. }
  728.  
  729.  
  730. /**
  731.  * Try to remove extraneous MOV instructions from the given program.
  732.  */
  733. static GLboolean
  734. _mesa_remove_extra_moves(struct gl_program *prog)
  735. {
  736.    GLboolean *removeInst; /* per-instruction removal flag */
  737.    GLuint i, rem = 0, nesting = 0;
  738.  
  739.    if (dbg) {
  740.       printf("Optimize: Begin remove extra moves\n");
  741.       _mesa_print_program(prog);
  742.    }
  743.  
  744.    removeInst =
  745.       calloc(prog->NumInstructions, sizeof(GLboolean));
  746.  
  747.    /*
  748.     * Look for sequences such as this:
  749.     *    FOO tmpX, arg0, arg1;
  750.     *    MOV tmpY, tmpX;
  751.     * and convert into:
  752.     *    FOO tmpY, arg0, arg1;
  753.     */
  754.  
  755.    for (i = 0; i < prog->NumInstructions; i++) {
  756.       const struct prog_instruction *mov = prog->Instructions + i;
  757.  
  758.       switch (mov->Opcode) {
  759.       case OPCODE_BGNLOOP:
  760.       case OPCODE_BGNSUB:
  761.       case OPCODE_IF:
  762.          nesting++;
  763.          break;
  764.       case OPCODE_ENDLOOP:
  765.       case OPCODE_ENDSUB:
  766.       case OPCODE_ENDIF:
  767.          nesting--;
  768.          break;
  769.       case OPCODE_MOV:
  770.          if (i > 0 &&
  771.              can_downward_mov_be_modifed(mov) &&
  772.              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
  773.              nesting == 0)
  774.          {
  775.  
  776.             /* see if this MOV can be removed */
  777.             const GLuint id = mov->SrcReg[0].Index;
  778.             struct prog_instruction *prevInst;
  779.             GLuint prevI;
  780.  
  781.             /* get pointer to previous instruction */
  782.             prevI = i - 1;
  783.             while (prevI > 0 && removeInst[prevI])
  784.                prevI--;
  785.             prevInst = prog->Instructions + prevI;
  786.  
  787.             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
  788.                 prevInst->DstReg.Index == id &&
  789.                 prevInst->DstReg.RelAddr == 0 &&
  790.                 prevInst->DstReg.CondMask == COND_TR) {
  791.  
  792.                const GLuint dst_mask = prevInst->DstReg.WriteMask;
  793.                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
  794.  
  795.                if (next_use == WRITE || next_use == END) {
  796.                   /* OK, we can safely remove this MOV instruction.
  797.                    * Transform:
  798.                    *   prevI: FOO tempIndex, x, y;
  799.                    *       i: MOV z, tempIndex;
  800.                    * Into:
  801.                    *   prevI: FOO z, x, y;
  802.                    */
  803.                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
  804.                      removeInst[i] = GL_TRUE;
  805.                      if (dbg) {
  806.                         printf("Remove MOV at %u\n", i);
  807.                         printf("new prev inst %u: ", prevI);
  808.                         _mesa_print_instruction(prevInst);
  809.                      }
  810.                   }
  811.                }
  812.             }
  813.          }
  814.          break;
  815.       default:
  816.          ; /* nothing */
  817.       }
  818.    }
  819.  
  820.    /* now remove the instructions which aren't needed */
  821.    rem = remove_instructions(prog, removeInst);
  822.  
  823.    free(removeInst);
  824.  
  825.    if (dbg) {
  826.       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
  827.       /*_mesa_print_program(prog);*/
  828.    }
  829.  
  830.    return rem != 0;
  831. }
  832.  
  833.  
  834. /** A live register interval */
  835. struct interval
  836. {
  837.    GLuint Reg;         /** The temporary register index */
  838.    GLuint Start, End;  /** Start/end instruction numbers */
  839. };
  840.  
  841.  
  842. /** A list of register intervals */
  843. struct interval_list
  844. {
  845.    GLuint Num;
  846.    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  847. };
  848.  
  849.  
  850. static void
  851. append_interval(struct interval_list *list, const struct interval *inv)
  852. {
  853.    list->Intervals[list->Num++] = *inv;
  854. }
  855.  
  856.  
  857. /** Insert interval inv into list, sorted by interval end */
  858. static void
  859. insert_interval_by_end(struct interval_list *list, const struct interval *inv)
  860. {
  861.    /* XXX we could do a binary search insertion here since list is sorted */
  862.    GLint i = list->Num - 1;
  863.    while (i >= 0 && list->Intervals[i].End > inv->End) {
  864.       list->Intervals[i + 1] = list->Intervals[i];
  865.       i--;
  866.    }
  867.    list->Intervals[i + 1] = *inv;
  868.    list->Num++;
  869.  
  870. #ifdef DEBUG
  871.    {
  872.       GLuint i;
  873.       for (i = 0; i + 1 < list->Num; i++) {
  874.          assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
  875.       }
  876.    }
  877. #endif
  878. }
  879.  
  880.  
  881. /** Remove the given interval from the interval list */
  882. static void
  883. remove_interval(struct interval_list *list, const struct interval *inv)
  884. {
  885.    /* XXX we could binary search since list is sorted */
  886.    GLuint k;
  887.    for (k = 0; k < list->Num; k++) {
  888.       if (list->Intervals[k].Reg == inv->Reg) {
  889.          /* found, remove it */
  890.          assert(list->Intervals[k].Start == inv->Start);
  891.          assert(list->Intervals[k].End == inv->End);
  892.          while (k < list->Num - 1) {
  893.             list->Intervals[k] = list->Intervals[k + 1];
  894.             k++;
  895.          }
  896.          list->Num--;
  897.          return;
  898.       }
  899.    }
  900. }
  901.  
  902.  
  903. /** called by qsort() */
  904. static int
  905. compare_start(const void *a, const void *b)
  906. {
  907.    const struct interval *ia = (const struct interval *) a;
  908.    const struct interval *ib = (const struct interval *) b;
  909.    if (ia->Start < ib->Start)
  910.       return -1;
  911.    else if (ia->Start > ib->Start)
  912.       return +1;
  913.    else
  914.       return 0;
  915. }
  916.  
  917.  
  918. /** sort the interval list according to interval starts */
  919. static void
  920. sort_interval_list_by_start(struct interval_list *list)
  921. {
  922.    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
  923. #ifdef DEBUG
  924.    {
  925.       GLuint i;
  926.       for (i = 0; i + 1 < list->Num; i++) {
  927.          assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
  928.       }
  929.    }
  930. #endif
  931. }
  932.  
  933. struct loop_info
  934. {
  935.    GLuint Start, End;  /**< Start, end instructions of loop */
  936. };
  937.  
  938. /**
  939.  * Update the intermediate interval info for register 'index' and
  940.  * instruction 'ic'.
  941.  */
  942. static void
  943. update_interval(GLint intBegin[], GLint intEnd[],
  944.                 struct loop_info *loopStack, GLuint loopStackDepth,
  945.                 GLuint index, GLuint ic)
  946. {
  947.    unsigned i;
  948.    GLuint begin = ic;
  949.    GLuint end = ic;
  950.  
  951.    /* If the register is used in a loop, extend its lifetime through the end
  952.     * of the outermost loop that doesn't contain its definition.
  953.     */
  954.    for (i = 0; i < loopStackDepth; i++) {
  955.       if (intBegin[index] < loopStack[i].Start) {
  956.          end = loopStack[i].End;
  957.          break;
  958.       }
  959.    }
  960.  
  961.    /* Variables that are live at the end of a loop will also be live at the
  962.     * beginning, so an instruction inside of a loop should have its live
  963.     * interval begin at the start of the outermost loop.
  964.     */
  965.    if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
  966.       begin = loopStack[0].Start;
  967.    }
  968.  
  969.    assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  970.    if (intBegin[index] == -1) {
  971.       assert(intEnd[index] == -1);
  972.       intBegin[index] = begin;
  973.       intEnd[index] = end;
  974.    }
  975.    else {
  976.       intEnd[index] = end;
  977.    }
  978. }
  979.  
  980.  
  981. /**
  982.  * Find first/last instruction that references each temporary register.
  983.  */
  984. GLboolean
  985. _mesa_find_temp_intervals(const struct prog_instruction *instructions,
  986.                           GLuint numInstructions,
  987.                           GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
  988.                           GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  989. {
  990.    struct loop_info loopStack[MAX_LOOP_NESTING];
  991.    GLuint loopStackDepth = 0;
  992.    GLuint i;
  993.  
  994.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  995.       intBegin[i] = intEnd[i] = -1;
  996.    }
  997.  
  998.    /* Scan instructions looking for temporary registers */
  999.    for (i = 0; i < numInstructions; i++) {
  1000.       const struct prog_instruction *inst = instructions + i;
  1001.       if (inst->Opcode == OPCODE_BGNLOOP) {
  1002.          loopStack[loopStackDepth].Start = i;
  1003.          loopStack[loopStackDepth].End = inst->BranchTarget;
  1004.          loopStackDepth++;
  1005.       }
  1006.       else if (inst->Opcode == OPCODE_ENDLOOP) {
  1007.          loopStackDepth--;
  1008.       }
  1009.       else if (inst->Opcode == OPCODE_CAL) {
  1010.          return GL_FALSE;
  1011.       }
  1012.       else {
  1013.          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
  1014.          GLuint j;
  1015.          for (j = 0; j < numSrc; j++) {
  1016.             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  1017.                const GLuint index = inst->SrcReg[j].Index;
  1018.                if (inst->SrcReg[j].RelAddr)
  1019.                   return GL_FALSE;
  1020.                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1021.                                index, i);
  1022.             }
  1023.          }
  1024.          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  1025.             const GLuint index = inst->DstReg.Index;
  1026.             if (inst->DstReg.RelAddr)
  1027.                return GL_FALSE;
  1028.             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1029.                             index, i);
  1030.          }
  1031.       }
  1032.    }
  1033.  
  1034.    return GL_TRUE;
  1035. }
  1036.  
  1037.  
  1038. /**
  1039.  * Find the live intervals for each temporary register in the program.
  1040.  * For register R, the interval [A,B] indicates that R is referenced
  1041.  * from instruction A through instruction B.
  1042.  * Special consideration is needed for loops and subroutines.
  1043.  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
  1044.  */
  1045. static GLboolean
  1046. find_live_intervals(struct gl_program *prog,
  1047.                     struct interval_list *liveIntervals)
  1048. {
  1049.    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1050.    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1051.    GLuint i;
  1052.  
  1053.    /*
  1054.     * Note: we'll return GL_FALSE below if we find relative indexing
  1055.     * into the TEMP register file.  We can't handle that yet.
  1056.     * We also give up on subroutines for now.
  1057.     */
  1058.  
  1059.    if (dbg) {
  1060.       printf("Optimize: Begin find intervals\n");
  1061.    }
  1062.  
  1063.    /* build intermediate arrays */
  1064.    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
  1065.                                   intBegin, intEnd))
  1066.       return GL_FALSE;
  1067.  
  1068.    /* Build live intervals list from intermediate arrays */
  1069.    liveIntervals->Num = 0;
  1070.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
  1071.       if (intBegin[i] >= 0) {
  1072.          struct interval inv;
  1073.          inv.Reg = i;
  1074.          inv.Start = intBegin[i];
  1075.          inv.End = intEnd[i];
  1076.          append_interval(liveIntervals, &inv);
  1077.       }
  1078.    }
  1079.  
  1080.    /* Sort the list according to interval starts */
  1081.    sort_interval_list_by_start(liveIntervals);
  1082.  
  1083.    if (dbg) {
  1084.       /* print interval info */
  1085.       for (i = 0; i < liveIntervals->Num; i++) {
  1086.          const struct interval *inv = liveIntervals->Intervals + i;
  1087.          printf("Reg[%d] live [%d, %d]:",
  1088.                       inv->Reg, inv->Start, inv->End);
  1089.          if (1) {
  1090.             GLuint j;
  1091.             for (j = 0; j < inv->Start; j++)
  1092.                printf(" ");
  1093.             for (j = inv->Start; j <= inv->End; j++)
  1094.                printf("x");
  1095.          }
  1096.          printf("\n");
  1097.       }
  1098.    }
  1099.  
  1100.    return GL_TRUE;
  1101. }
  1102.  
  1103.  
  1104. /** Scan the array of used register flags to find free entry */
  1105. static GLint
  1106. alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  1107. {
  1108.    GLuint k;
  1109.    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
  1110.       if (!usedRegs[k]) {
  1111.          usedRegs[k] = GL_TRUE;
  1112.          return k;
  1113.       }
  1114.    }
  1115.    return -1;
  1116. }
  1117.  
  1118.  
  1119. /**
  1120.  * This function implements "Linear Scan Register Allocation" to reduce
  1121.  * the number of temporary registers used by the program.
  1122.  *
  1123.  * We compute the "live interval" for all temporary registers then
  1124.  * examine the overlap of the intervals to allocate new registers.
  1125.  * Basically, if two intervals do not overlap, they can use the same register.
  1126.  */
  1127. static void
  1128. _mesa_reallocate_registers(struct gl_program *prog)
  1129. {
  1130.    struct interval_list liveIntervals;
  1131.    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1132.    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1133.    GLuint i;
  1134.    GLint maxTemp = -1;
  1135.  
  1136.    if (dbg) {
  1137.       printf("Optimize: Begin live-interval register reallocation\n");
  1138.       _mesa_print_program(prog);
  1139.    }
  1140.  
  1141.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  1142.       registerMap[i] = -1;
  1143.       usedRegs[i] = GL_FALSE;
  1144.    }
  1145.  
  1146.    if (!find_live_intervals(prog, &liveIntervals)) {
  1147.       if (dbg)
  1148.          printf("Aborting register reallocation\n");
  1149.       return;
  1150.    }
  1151.  
  1152.    {
  1153.       struct interval_list activeIntervals;
  1154.       activeIntervals.Num = 0;
  1155.  
  1156.       /* loop over live intervals, allocating a new register for each */
  1157.       for (i = 0; i < liveIntervals.Num; i++) {
  1158.          const struct interval *live = liveIntervals.Intervals + i;
  1159.  
  1160.          if (dbg)
  1161.             printf("Consider register %u\n", live->Reg);
  1162.  
  1163.          /* Expire old intervals.  Intervals which have ended with respect
  1164.           * to the live interval can have their remapped registers freed.
  1165.           */
  1166.          {
  1167.             GLint j;
  1168.             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
  1169.                const struct interval *inv = activeIntervals.Intervals + j;
  1170.                if (inv->End >= live->Start) {
  1171.                   /* Stop now.  Since the activeInterval list is sorted
  1172.                    * we know we don't have to go further.
  1173.                    */
  1174.                   break;
  1175.                }
  1176.                else {
  1177.                   /* Interval 'inv' has expired */
  1178.                   const GLint regNew = registerMap[inv->Reg];
  1179.                   assert(regNew >= 0);
  1180.  
  1181.                   if (dbg)
  1182.                      printf("  expire interval for reg %u\n", inv->Reg);
  1183.  
  1184.                   /* remove interval j from active list */
  1185.                   remove_interval(&activeIntervals, inv);
  1186.                   j--;  /* counter-act j++ in for-loop above */
  1187.  
  1188.                   /* return register regNew to the free pool */
  1189.                   if (dbg)
  1190.                      printf("  free reg %d\n", regNew);
  1191.                   assert(usedRegs[regNew] == GL_TRUE);
  1192.                   usedRegs[regNew] = GL_FALSE;
  1193.                }
  1194.             }
  1195.          }
  1196.  
  1197.          /* find a free register for this live interval */
  1198.          {
  1199.             const GLint k = alloc_register(usedRegs);
  1200.             if (k < 0) {
  1201.                /* out of registers, give up */
  1202.                return;
  1203.             }
  1204.             registerMap[live->Reg] = k;
  1205.             maxTemp = MAX2(maxTemp, k);
  1206.             if (dbg)
  1207.                printf("  remap register %u -> %d\n", live->Reg, k);
  1208.          }
  1209.  
  1210.          /* Insert this live interval into the active list which is sorted
  1211.           * by increasing end points.
  1212.           */
  1213.          insert_interval_by_end(&activeIntervals, live);
  1214.       }
  1215.    }
  1216.  
  1217.    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
  1218.       /* OK, we've reduced the number of registers needed.
  1219.        * Scan the program and replace all the old temporary register
  1220.        * indexes with the new indexes.
  1221.        */
  1222.       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
  1223.  
  1224.       prog->NumTemporaries = maxTemp + 1;
  1225.    }
  1226.  
  1227.    if (dbg) {
  1228.       printf("Optimize: End live-interval register reallocation\n");
  1229.       printf("Num temp regs before: %u  after: %u\n",
  1230.                    liveIntervals.Num, maxTemp + 1);
  1231.       _mesa_print_program(prog);
  1232.    }
  1233. }
  1234.  
  1235.  
  1236. #if 0
  1237. static void
  1238. print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
  1239.    fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
  1240.    _mesa_print_program(program);
  1241.    _mesa_print_program_parameters(ctx, program);
  1242.    fprintf(stderr, "\n\n");
  1243. }
  1244. #endif
  1245.  
  1246. /**
  1247.  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
  1248.  * instruction is the first instruction to write to register T0.  The are
  1249.  * several lowering passes done in GLSL IR (e.g. branches and
  1250.  * relative addressing) that create a large number of conditional assignments
  1251.  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
  1252.  *
  1253.  * Here is why this conversion is safe:
  1254.  * CMP T0, T1 T2 T0 can be expanded to:
  1255.  * if (T1 < 0.0)
  1256.  *      MOV T0, T2;
  1257.  * else
  1258.  *      MOV T0, T0;
  1259.  *
  1260.  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
  1261.  * as the original program.  If (T1 < 0.0) evaluates to false, executing
  1262.  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
  1263.  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
  1264.  * because any instruction that was going to read from T0 after this was going
  1265.  * to read a garbage value anyway.
  1266.  */
  1267. static void
  1268. _mesa_simplify_cmp(struct gl_program * program)
  1269. {
  1270.    GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1271.    GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
  1272.    GLuint i;
  1273.  
  1274.    if (dbg) {
  1275.       printf("Optimize: Begin reads without writes\n");
  1276.       _mesa_print_program(program);
  1277.    }
  1278.  
  1279.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
  1280.       tempWrites[i] = 0;
  1281.    }
  1282.  
  1283.    for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
  1284.       outputWrites[i] = 0;
  1285.    }
  1286.  
  1287.    for (i = 0; i < program->NumInstructions; i++) {
  1288.       struct prog_instruction *inst = program->Instructions + i;
  1289.       GLuint prevWriteMask;
  1290.  
  1291.       /* Give up if we encounter relative addressing or flow control. */
  1292.       if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
  1293.          return;
  1294.       }
  1295.  
  1296.       if (inst->DstReg.File == PROGRAM_OUTPUT) {
  1297.          assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
  1298.          prevWriteMask = outputWrites[inst->DstReg.Index];
  1299.          outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
  1300.       } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  1301.          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  1302.          prevWriteMask = tempWrites[inst->DstReg.Index];
  1303.          tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
  1304.       } else {
  1305.          /* No other register type can be a destination register. */
  1306.          continue;
  1307.       }
  1308.  
  1309.       /* For a CMP to be considered a conditional write, the destination
  1310.        * register and source register two must be the same. */
  1311.       if (inst->Opcode == OPCODE_CMP
  1312.           && !(inst->DstReg.WriteMask & prevWriteMask)
  1313.           && inst->SrcReg[2].File == inst->DstReg.File
  1314.           && inst->SrcReg[2].Index == inst->DstReg.Index
  1315.           && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
  1316.  
  1317.          inst->Opcode = OPCODE_MOV;
  1318.          inst->SrcReg[0] = inst->SrcReg[1];
  1319.  
  1320.          /* Unused operands are expected to have the file set to
  1321.           * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
  1322.           * all of the sources.
  1323.           */
  1324.          inst->SrcReg[1].File = PROGRAM_UNDEFINED;
  1325.          inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
  1326.          inst->SrcReg[2].File = PROGRAM_UNDEFINED;
  1327.          inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
  1328.       }
  1329.    }
  1330.    if (dbg) {
  1331.       printf("Optimize: End reads without writes\n");
  1332.       _mesa_print_program(program);
  1333.    }
  1334. }
  1335.  
  1336. /**
  1337.  * Apply optimizations to the given program to eliminate unnecessary
  1338.  * instructions, temp regs, etc.
  1339.  */
  1340. void
  1341. _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
  1342. {
  1343.    GLboolean any_change;
  1344.  
  1345.    _mesa_simplify_cmp(program);
  1346.    /* Stop when no modifications were output */
  1347.    do {
  1348.       any_change = GL_FALSE;
  1349.       _mesa_remove_extra_move_use(program);
  1350.       if (_mesa_remove_dead_code_global(program))
  1351.          any_change = GL_TRUE;
  1352.       if (_mesa_remove_extra_moves(program))
  1353.          any_change = GL_TRUE;
  1354.       if (_mesa_remove_dead_code_local(program))
  1355.          any_change = GL_TRUE;
  1356.  
  1357.       any_change = _mesa_constant_fold(program) || any_change;
  1358.       _mesa_reallocate_registers(program);
  1359.    } while (any_change);
  1360. }
  1361.  
  1362.