Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  * Version:  7.5
  4.  *
  5.  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the "Software"),
  9.  * to deal in the Software without restriction, including without limitation
  10.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11.  * and/or sell copies of the Software, and to permit persons to whom the
  12.  * Software is furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice shall be included
  15.  * in all copies or substantial portions of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21.  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22.  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  */
  24.  
  25.  
  26.  
  27. #include "main/glheader.h"
  28. #include "main/context.h"
  29. #include "main/macros.h"
  30. #include "program.h"
  31. #include "prog_instruction.h"
  32. #include "prog_optimize.h"
  33. #include "prog_print.h"
  34.  
  35.  
  36. #define MAX_LOOP_NESTING 50
  37. /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
  38.  * register allocate many temporary values into that small number of
  39.  * temps.  So allow large temporary indices coming into the register
  40.  * allocator.
  41.  */
  42. #define REG_ALLOCATE_MAX_PROGRAM_TEMPS  ((1 << INST_INDEX_BITS) - 1)
  43.  
  44. static GLboolean dbg = GL_FALSE;
  45.  
  46. #define NO_MASK 0xf
  47.  
  48. /**
  49.  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
  50.  * are read from the given src in this instruction, We also provide
  51.  * one optional masks which may mask other components in the dst
  52.  * register
  53.  */
  54. static GLuint
  55. get_src_arg_mask(const struct prog_instruction *inst,
  56.                  GLuint arg, GLuint dst_mask)
  57. {
  58.    GLuint read_mask, channel_mask;
  59.    GLuint comp;
  60.  
  61.    ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
  62.  
  63.    /* Form the dst register, find the written channels */
  64.    if (inst->CondUpdate) {
  65.       channel_mask = WRITEMASK_XYZW;
  66.    }
  67.    else {
  68.       switch (inst->Opcode) {
  69.       case OPCODE_MOV:
  70.       case OPCODE_MIN:
  71.       case OPCODE_MAX:
  72.       case OPCODE_ABS:
  73.       case OPCODE_ADD:
  74.       case OPCODE_MAD:
  75.       case OPCODE_MUL:
  76.       case OPCODE_SUB:
  77.          channel_mask = inst->DstReg.WriteMask & dst_mask;
  78.          break;
  79.       case OPCODE_RCP:
  80.       case OPCODE_SIN:
  81.       case OPCODE_COS:
  82.       case OPCODE_RSQ:
  83.       case OPCODE_POW:
  84.       case OPCODE_EX2:
  85.       case OPCODE_LOG:
  86.          channel_mask = WRITEMASK_X;
  87.          break;
  88.       case OPCODE_DP2:
  89.          channel_mask = WRITEMASK_XY;
  90.          break;
  91.       case OPCODE_DP3:
  92.       case OPCODE_XPD:
  93.          channel_mask = WRITEMASK_XYZ;
  94.          break;
  95.       default:
  96.          channel_mask = WRITEMASK_XYZW;
  97.          break;
  98.       }
  99.    }
  100.  
  101.    /* Now, given the src swizzle and the written channels, find which
  102.     * components are actually read
  103.     */
  104.    read_mask = 0x0;
  105.    for (comp = 0; comp < 4; ++comp) {
  106.       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
  107.       ASSERT(coord < 4);
  108.       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
  109.          read_mask |= 1 << coord;
  110.    }
  111.  
  112.    return read_mask;
  113. }
  114.  
  115.  
  116. /**
  117.  * For a MOV instruction, compute a write mask when src register also has
  118.  * a mask
  119.  */
  120. static GLuint
  121. get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
  122. {
  123.    const GLuint mask = mov->DstReg.WriteMask;
  124.    GLuint comp;
  125.    GLuint updated_mask = 0x0;
  126.  
  127.    ASSERT(mov->Opcode == OPCODE_MOV);
  128.  
  129.    for (comp = 0; comp < 4; ++comp) {
  130.       GLuint src_comp;
  131.       if ((mask & (1 << comp)) == 0)
  132.          continue;
  133.       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
  134.       if ((src_mask & (1 << src_comp)) == 0)
  135.          continue;
  136.       updated_mask |= 1 << comp;
  137.    }
  138.  
  139.    return updated_mask;
  140. }
  141.  
  142.  
  143. /**
  144.  * Ensure that the swizzle is regular.  That is, all of the swizzle
  145.  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
  146.  */
  147. static GLboolean
  148. is_swizzle_regular(GLuint swz)
  149. {
  150.    return GET_SWZ(swz,0) <= SWIZZLE_W &&
  151.           GET_SWZ(swz,1) <= SWIZZLE_W &&
  152.           GET_SWZ(swz,2) <= SWIZZLE_W &&
  153.           GET_SWZ(swz,3) <= SWIZZLE_W;
  154. }
  155.  
  156.  
  157. /**
  158.  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
  159.  * \return number of instructions removed
  160.  */
  161. static GLuint
  162. remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
  163. {
  164.    GLint i, removeEnd = 0, removeCount = 0;
  165.    GLuint totalRemoved = 0;
  166.  
  167.    /* go backward */
  168.    for (i = prog->NumInstructions - 1; i >= 0; i--) {
  169.       if (removeFlags[i]) {
  170.          totalRemoved++;
  171.          if (removeCount == 0) {
  172.             /* begin a run of instructions to remove */
  173.             removeEnd = i;
  174.             removeCount = 1;
  175.          }
  176.          else {
  177.             /* extend the run of instructions to remove */
  178.             removeCount++;
  179.          }
  180.       }
  181.       else {
  182.          /* don't remove this instruction, but check if the preceeding
  183.           * instructions are to be removed.
  184.           */
  185.          if (removeCount > 0) {
  186.             GLint removeStart = removeEnd - removeCount + 1;
  187.             _mesa_delete_instructions(prog, removeStart, removeCount);
  188.             removeStart = removeCount = 0; /* reset removal info */
  189.          }
  190.       }
  191.    }
  192.    /* Finish removing if the first instruction was to be removed. */
  193.    if (removeCount > 0) {
  194.       GLint removeStart = removeEnd - removeCount + 1;
  195.       _mesa_delete_instructions(prog, removeStart, removeCount);
  196.    }
  197.    return totalRemoved;
  198. }
  199.  
  200.  
  201. /**
  202.  * Remap register indexes according to map.
  203.  * \param prog  the program to search/replace
  204.  * \param file  the type of register file to search/replace
  205.  * \param map  maps old register indexes to new indexes
  206.  */
  207. static void
  208. replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
  209. {
  210.    GLuint i;
  211.  
  212.    for (i = 0; i < prog->NumInstructions; i++) {
  213.       struct prog_instruction *inst = prog->Instructions + i;
  214.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  215.       GLuint j;
  216.       for (j = 0; j < numSrc; j++) {
  217.          if (inst->SrcReg[j].File == file) {
  218.             GLuint index = inst->SrcReg[j].Index;
  219.             ASSERT(map[index] >= 0);
  220.             inst->SrcReg[j].Index = map[index];
  221.          }
  222.       }
  223.       if (inst->DstReg.File == file) {
  224.          const GLuint index = inst->DstReg.Index;
  225.          ASSERT(map[index] >= 0);
  226.          inst->DstReg.Index = map[index];
  227.       }
  228.    }
  229. }
  230.  
  231.  
  232. /**
  233.  * Remove dead instructions from the given program.
  234.  * This is very primitive for now.  Basically look for temp registers
  235.  * that are written to but never read.  Remove any instructions that
  236.  * write to such registers.  Be careful with condition code setters.
  237.  */
  238. static GLboolean
  239. _mesa_remove_dead_code_global(struct gl_program *prog)
  240. {
  241.    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
  242.    GLboolean *removeInst; /* per-instruction removal flag */
  243.    GLuint i, rem = 0, comp;
  244.  
  245.    memset(tempRead, 0, sizeof(tempRead));
  246.  
  247.    if (dbg) {
  248.       printf("Optimize: Begin dead code removal\n");
  249.       /*_mesa_print_program(prog);*/
  250.    }
  251.  
  252.    removeInst = (GLboolean *)
  253.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  254.  
  255.    /* Determine which temps are read and written */
  256.    for (i = 0; i < prog->NumInstructions; i++) {
  257.       const struct prog_instruction *inst = prog->Instructions + i;
  258.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  259.       GLuint j;
  260.  
  261.       /* check src regs */
  262.       for (j = 0; j < numSrc; j++) {
  263.          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  264.             const GLuint index = inst->SrcReg[j].Index;
  265.             GLuint read_mask;
  266.             ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  267.             read_mask = get_src_arg_mask(inst, j, NO_MASK);
  268.  
  269.             if (inst->SrcReg[j].RelAddr) {
  270.                if (dbg)
  271.                   printf("abort remove dead code (indirect temp)\n");
  272.                goto done;
  273.             }
  274.  
  275.             for (comp = 0; comp < 4; comp++) {
  276.                const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
  277.                ASSERT(swz < 4);
  278.                if ((read_mask & (1 << swz)) == 0)
  279.                   continue;
  280.                if (swz <= SWIZZLE_W)
  281.                   tempRead[index][swz] = GL_TRUE;
  282.             }
  283.          }
  284.       }
  285.  
  286.       /* check dst reg */
  287.       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  288.          const GLuint index = inst->DstReg.Index;
  289.          ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  290.  
  291.          if (inst->DstReg.RelAddr) {
  292.             if (dbg)
  293.                printf("abort remove dead code (indirect temp)\n");
  294.             goto done;
  295.          }
  296.  
  297.          if (inst->CondUpdate) {
  298.             /* If we're writing to this register and setting condition
  299.              * codes we cannot remove the instruction.  Prevent removal
  300.              * by setting the 'read' flag.
  301.              */
  302.             tempRead[index][0] = GL_TRUE;
  303.             tempRead[index][1] = GL_TRUE;
  304.             tempRead[index][2] = GL_TRUE;
  305.             tempRead[index][3] = GL_TRUE;
  306.          }
  307.       }
  308.    }
  309.  
  310.    /* find instructions that write to dead registers, flag for removal */
  311.    for (i = 0; i < prog->NumInstructions; i++) {
  312.       struct prog_instruction *inst = prog->Instructions + i;
  313.       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
  314.  
  315.       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
  316.          GLint chan, index = inst->DstReg.Index;
  317.  
  318.          for (chan = 0; chan < 4; chan++) {
  319.             if (!tempRead[index][chan] &&
  320.                 inst->DstReg.WriteMask & (1 << chan)) {
  321.                if (dbg) {
  322.                   printf("Remove writemask on %u.%c\n", i,
  323.                                chan == 3 ? 'w' : 'x' + chan);
  324.                }
  325.                inst->DstReg.WriteMask &= ~(1 << chan);
  326.                rem++;
  327.             }
  328.          }
  329.  
  330.          if (inst->DstReg.WriteMask == 0) {
  331.             /* If we cleared all writes, the instruction can be removed. */
  332.             if (dbg)
  333.                printf("Remove instruction %u: \n", i);
  334.             removeInst[i] = GL_TRUE;
  335.          }
  336.       }
  337.    }
  338.  
  339.    /* now remove the instructions which aren't needed */
  340.    rem = remove_instructions(prog, removeInst);
  341.  
  342.    if (dbg) {
  343.       printf("Optimize: End dead code removal.\n");
  344.       printf("  %u channel writes removed\n", rem);
  345.       printf("  %u instructions removed\n", rem);
  346.       /*_mesa_print_program(prog);*/
  347.    }
  348.  
  349. done:
  350.    free(removeInst);
  351.    return rem != 0;
  352. }
  353.  
  354.  
  355. enum inst_use
  356. {
  357.    READ,
  358.    WRITE,
  359.    FLOW,
  360.    END
  361. };
  362.  
  363.  
  364. /**
  365.  * Scan forward in program from 'start' for the next occurances of TEMP[index].
  366.  * We look if an instruction reads the component given by the masks and if they
  367.  * are overwritten.
  368.  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
  369.  * that we can't look further.
  370.  */
  371. static enum inst_use
  372. find_next_use(const struct gl_program *prog,
  373.               GLuint start,
  374.               GLuint index,
  375.               GLuint mask)
  376. {
  377.    GLuint i;
  378.  
  379.    for (i = start; i < prog->NumInstructions; i++) {
  380.       const struct prog_instruction *inst = prog->Instructions + i;
  381.       switch (inst->Opcode) {
  382.       case OPCODE_BGNLOOP:
  383.       case OPCODE_BGNSUB:
  384.       case OPCODE_BRA:
  385.       case OPCODE_CAL:
  386.       case OPCODE_CONT:
  387.       case OPCODE_IF:
  388.       case OPCODE_ELSE:
  389.       case OPCODE_ENDIF:
  390.       case OPCODE_ENDLOOP:
  391.       case OPCODE_ENDSUB:
  392.       case OPCODE_RET:
  393.          return FLOW;
  394.       case OPCODE_END:
  395.          return END;
  396.       default:
  397.          {
  398.             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  399.             GLuint j;
  400.             for (j = 0; j < numSrc; j++) {
  401.                if (inst->SrcReg[j].RelAddr ||
  402.                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
  403.                    inst->SrcReg[j].Index == index &&
  404.                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
  405.                   return READ;
  406.             }
  407.             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
  408.                 inst->DstReg.File == PROGRAM_TEMPORARY &&
  409.                 inst->DstReg.Index == index) {
  410.                mask &= ~inst->DstReg.WriteMask;
  411.                if (mask == 0)
  412.                   return WRITE;
  413.             }
  414.          }
  415.       }
  416.    }
  417.    return END;
  418. }
  419.  
  420.  
  421. /**
  422.  * Is the given instruction opcode a flow-control opcode?
  423.  * XXX maybe move this into prog_instruction.[ch]
  424.  */
  425. static GLboolean
  426. _mesa_is_flow_control_opcode(enum prog_opcode opcode)
  427. {
  428.    switch (opcode) {
  429.    case OPCODE_BGNLOOP:
  430.    case OPCODE_BGNSUB:
  431.    case OPCODE_BRA:
  432.    case OPCODE_CAL:
  433.    case OPCODE_CONT:
  434.    case OPCODE_IF:
  435.    case OPCODE_ELSE:
  436.    case OPCODE_END:
  437.    case OPCODE_ENDIF:
  438.    case OPCODE_ENDLOOP:
  439.    case OPCODE_ENDSUB:
  440.    case OPCODE_RET:
  441.       return GL_TRUE;
  442.    default:
  443.       return GL_FALSE;
  444.    }
  445. }
  446.  
  447.  
  448. /**
  449.  * Test if the given instruction is a simple MOV (no conditional updating,
  450.  * not relative addressing, no negation/abs, etc).
  451.  */
  452. static GLboolean
  453. can_downward_mov_be_modifed(const struct prog_instruction *mov)
  454. {
  455.    return
  456.       mov->Opcode == OPCODE_MOV &&
  457.       mov->CondUpdate == GL_FALSE &&
  458.       mov->SrcReg[0].RelAddr == 0 &&
  459.       mov->SrcReg[0].Negate == 0 &&
  460.       mov->SrcReg[0].Abs == 0 &&
  461.       mov->SrcReg[0].HasIndex2 == 0 &&
  462.       mov->SrcReg[0].RelAddr2 == 0 &&
  463.       mov->DstReg.RelAddr == 0 &&
  464.       mov->DstReg.CondMask == COND_TR &&
  465.       mov->SaturateMode == SATURATE_OFF;
  466. }
  467.  
  468.  
  469. static GLboolean
  470. can_upward_mov_be_modifed(const struct prog_instruction *mov)
  471. {
  472.    return
  473.       can_downward_mov_be_modifed(mov) &&
  474.       mov->DstReg.File == PROGRAM_TEMPORARY;
  475. }
  476.  
  477.  
  478. /**
  479.  * Try to remove use of extraneous MOV instructions, to free them up for dead
  480.  * code removal.
  481.  */
  482. static void
  483. _mesa_remove_extra_move_use(struct gl_program *prog)
  484. {
  485.    GLuint i, j;
  486.  
  487.    if (dbg) {
  488.       printf("Optimize: Begin remove extra move use\n");
  489.       _mesa_print_program(prog);
  490.    }
  491.  
  492.    /*
  493.     * Look for sequences such as this:
  494.     *    MOV tmpX, arg0;
  495.     *    ...
  496.     *    FOO tmpY, tmpX, arg1;
  497.     * and convert into:
  498.     *    MOV tmpX, arg0;
  499.     *    ...
  500.     *    FOO tmpY, arg0, arg1;
  501.     */
  502.  
  503.    for (i = 0; i + 1 < prog->NumInstructions; i++) {
  504.       const struct prog_instruction *mov = prog->Instructions + i;
  505.       GLuint dst_mask, src_mask;
  506.       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
  507.          continue;
  508.  
  509.       /* Scanning the code, we maintain the components which are still active in
  510.        * these two masks
  511.        */
  512.       dst_mask = mov->DstReg.WriteMask;
  513.       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
  514.  
  515.       /* Walk through remaining instructions until the or src reg gets
  516.        * rewritten or we get into some flow-control, eliminating the use of
  517.        * this MOV.
  518.        */
  519.       for (j = i + 1; j < prog->NumInstructions; j++) {
  520.          struct prog_instruction *inst2 = prog->Instructions + j;
  521.          GLuint arg;
  522.  
  523.          if (_mesa_is_flow_control_opcode(inst2->Opcode))
  524.              break;
  525.  
  526.          /* First rewrite this instruction's args if appropriate. */
  527.          for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
  528.             GLuint comp, read_mask;
  529.  
  530.             if (inst2->SrcReg[arg].File != mov->DstReg.File ||
  531.                 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
  532.                 inst2->SrcReg[arg].RelAddr ||
  533.                 inst2->SrcReg[arg].Abs)
  534.                continue;
  535.             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
  536.  
  537.             /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
  538.              * components read still come from the mov instructions
  539.              */
  540.             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
  541.                (read_mask & dst_mask) == read_mask) {
  542.                for (comp = 0; comp < 4; comp++) {
  543.                   const GLuint inst2_swz =
  544.                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
  545.                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
  546.                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
  547.                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
  548.                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
  549.                                                   inst2_swz) & 0x1) << comp);
  550.                }
  551.                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
  552.                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
  553.             }
  554.          }
  555.  
  556.          /* The source of MOV is written. This potentially deactivates some
  557.           * components from the src and dst of the MOV instruction
  558.           */
  559.          if (inst2->DstReg.File == mov->DstReg.File &&
  560.              (inst2->DstReg.RelAddr ||
  561.               inst2->DstReg.Index == mov->DstReg.Index)) {
  562.             dst_mask &= ~inst2->DstReg.WriteMask;
  563.             src_mask = get_src_arg_mask(mov, 0, dst_mask);
  564.          }
  565.  
  566.          /* Idem when the destination of mov is written */
  567.          if (inst2->DstReg.File == mov->SrcReg[0].File &&
  568.              (inst2->DstReg.RelAddr ||
  569.               inst2->DstReg.Index == mov->SrcReg[0].Index)) {
  570.             src_mask &= ~inst2->DstReg.WriteMask;
  571.             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
  572.          }
  573.          if (dst_mask == 0)
  574.             break;
  575.       }
  576.    }
  577.  
  578.    if (dbg) {
  579.       printf("Optimize: End remove extra move use.\n");
  580.       /*_mesa_print_program(prog);*/
  581.    }
  582. }
  583.  
  584.  
  585. /**
  586.  * Complements dead_code_global. Try to remove code in block of code by
  587.  * carefully monitoring the swizzles. Both functions should be merged into one
  588.  * with a proper control flow graph
  589.  */
  590. static GLboolean
  591. _mesa_remove_dead_code_local(struct gl_program *prog)
  592. {
  593.    GLboolean *removeInst;
  594.    GLuint i, arg, rem = 0;
  595.  
  596.    removeInst = (GLboolean *)
  597.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  598.  
  599.    for (i = 0; i < prog->NumInstructions; i++) {
  600.       const struct prog_instruction *inst = prog->Instructions + i;
  601.       const GLuint index = inst->DstReg.Index;
  602.       const GLuint mask = inst->DstReg.WriteMask;
  603.       enum inst_use use;
  604.  
  605.       /* We must deactivate the pass as soon as some indirection is used */
  606.       if (inst->DstReg.RelAddr)
  607.          goto done;
  608.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
  609.          if (inst->SrcReg[arg].RelAddr)
  610.             goto done;
  611.  
  612.       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
  613.           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
  614.           inst->DstReg.File != PROGRAM_TEMPORARY ||
  615.           inst->DstReg.RelAddr)
  616.          continue;
  617.  
  618.       use = find_next_use(prog, i+1, index, mask);
  619.       if (use == WRITE || use == END)
  620.          removeInst[i] = GL_TRUE;
  621.    }
  622.  
  623.    rem = remove_instructions(prog, removeInst);
  624.  
  625. done:
  626.    free(removeInst);
  627.    return rem != 0;
  628. }
  629.  
  630.  
  631. /**
  632.  * Try to inject the destination of mov as the destination of inst and recompute
  633.  * the swizzles operators for the sources of inst if required. Return GL_TRUE
  634.  * of the substitution was possible, GL_FALSE otherwise
  635.  */
  636. static GLboolean
  637. _mesa_merge_mov_into_inst(struct prog_instruction *inst,
  638.                           const struct prog_instruction *mov)
  639. {
  640.    /* Indirection table which associates destination and source components for
  641.     * the mov instruction
  642.     */
  643.    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
  644.  
  645.    /* Some components are not written by inst. We cannot remove the mov */
  646.    if (mask != (inst->DstReg.WriteMask & mask))
  647.       return GL_FALSE;
  648.  
  649.    /* Depending on the instruction, we may need to recompute the swizzles.
  650.     * Also, some other instructions (like TEX) are not linear. We will only
  651.     * consider completely active sources and destinations
  652.     */
  653.    switch (inst->Opcode) {
  654.  
  655.    /* Carstesian instructions: we compute the swizzle */
  656.    case OPCODE_MOV:
  657.    case OPCODE_MIN:
  658.    case OPCODE_MAX:
  659.    case OPCODE_ABS:
  660.    case OPCODE_ADD:
  661.    case OPCODE_MAD:
  662.    case OPCODE_MUL:
  663.    case OPCODE_SUB:
  664.    {
  665.       GLuint dst_to_src_comp[4] = {0,0,0,0};
  666.       GLuint dst_comp, arg;
  667.       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  668.          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
  669.             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
  670.             ASSERT(src_comp < 4);
  671.             dst_to_src_comp[dst_comp] = src_comp;
  672.          }
  673.       }
  674.  
  675.       /* Patch each source of the instruction */
  676.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
  677.          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
  678.          inst->SrcReg[arg].Swizzle = 0;
  679.  
  680.          /* Reset each active component of the swizzle */
  681.          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  682.             GLuint src_comp, arg_comp;
  683.             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
  684.                continue;
  685.             src_comp = dst_to_src_comp[dst_comp];
  686.             ASSERT(src_comp < 4);
  687.             arg_comp = GET_SWZ(arg_swz, src_comp);
  688.             ASSERT(arg_comp < 4);
  689.             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
  690.          }
  691.       }
  692.       inst->DstReg = mov->DstReg;
  693.       return GL_TRUE;
  694.    }
  695.  
  696.    /* Dot products and scalar instructions: we only change the destination */
  697.    case OPCODE_RCP:
  698.    case OPCODE_SIN:
  699.    case OPCODE_COS:
  700.    case OPCODE_RSQ:
  701.    case OPCODE_POW:
  702.    case OPCODE_EX2:
  703.    case OPCODE_LOG:
  704.    case OPCODE_DP2:
  705.    case OPCODE_DP3:
  706.    case OPCODE_DP4:
  707.       inst->DstReg = mov->DstReg;
  708.       return GL_TRUE;
  709.  
  710.    /* All other instructions require fully active components with no swizzle */
  711.    default:
  712.       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
  713.           inst->DstReg.WriteMask != WRITEMASK_XYZW)
  714.          return GL_FALSE;
  715.       inst->DstReg = mov->DstReg;
  716.       return GL_TRUE;
  717.    }
  718. }
  719.  
  720.  
  721. /**
  722.  * Try to remove extraneous MOV instructions from the given program.
  723.  */
  724. static GLboolean
  725. _mesa_remove_extra_moves(struct gl_program *prog)
  726. {
  727.    GLboolean *removeInst; /* per-instruction removal flag */
  728.    GLuint i, rem = 0, nesting = 0;
  729.  
  730.    if (dbg) {
  731.       printf("Optimize: Begin remove extra moves\n");
  732.       _mesa_print_program(prog);
  733.    }
  734.  
  735.    removeInst = (GLboolean *)
  736.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  737.  
  738.    /*
  739.     * Look for sequences such as this:
  740.     *    FOO tmpX, arg0, arg1;
  741.     *    MOV tmpY, tmpX;
  742.     * and convert into:
  743.     *    FOO tmpY, arg0, arg1;
  744.     */
  745.  
  746.    for (i = 0; i < prog->NumInstructions; i++) {
  747.       const struct prog_instruction *mov = prog->Instructions + i;
  748.  
  749.       switch (mov->Opcode) {
  750.       case OPCODE_BGNLOOP:
  751.       case OPCODE_BGNSUB:
  752.       case OPCODE_IF:
  753.          nesting++;
  754.          break;
  755.       case OPCODE_ENDLOOP:
  756.       case OPCODE_ENDSUB:
  757.       case OPCODE_ENDIF:
  758.          nesting--;
  759.          break;
  760.       case OPCODE_MOV:
  761.          if (i > 0 &&
  762.              can_downward_mov_be_modifed(mov) &&
  763.              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
  764.              nesting == 0)
  765.          {
  766.  
  767.             /* see if this MOV can be removed */
  768.             const GLuint id = mov->SrcReg[0].Index;
  769.             struct prog_instruction *prevInst;
  770.             GLuint prevI;
  771.  
  772.             /* get pointer to previous instruction */
  773.             prevI = i - 1;
  774.             while (prevI > 0 && removeInst[prevI])
  775.                prevI--;
  776.             prevInst = prog->Instructions + prevI;
  777.  
  778.             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
  779.                 prevInst->DstReg.Index == id &&
  780.                 prevInst->DstReg.RelAddr == 0 &&
  781.                 prevInst->DstReg.CondSrc == 0 &&
  782.                 prevInst->DstReg.CondMask == COND_TR) {
  783.  
  784.                const GLuint dst_mask = prevInst->DstReg.WriteMask;
  785.                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
  786.  
  787.                if (next_use == WRITE || next_use == END) {
  788.                   /* OK, we can safely remove this MOV instruction.
  789.                    * Transform:
  790.                    *   prevI: FOO tempIndex, x, y;
  791.                    *       i: MOV z, tempIndex;
  792.                    * Into:
  793.                    *   prevI: FOO z, x, y;
  794.                    */
  795.                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
  796.                      removeInst[i] = GL_TRUE;
  797.                      if (dbg) {
  798.                         printf("Remove MOV at %u\n", i);
  799.                         printf("new prev inst %u: ", prevI);
  800.                         _mesa_print_instruction(prevInst);
  801.                      }
  802.                   }
  803.                }
  804.             }
  805.          }
  806.          break;
  807.       default:
  808.          ; /* nothing */
  809.       }
  810.    }
  811.  
  812.    /* now remove the instructions which aren't needed */
  813.    rem = remove_instructions(prog, removeInst);
  814.  
  815.    free(removeInst);
  816.  
  817.    if (dbg) {
  818.       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
  819.       /*_mesa_print_program(prog);*/
  820.    }
  821.  
  822.    return rem != 0;
  823. }
  824.  
  825.  
  826. /** A live register interval */
  827. struct interval
  828. {
  829.    GLuint Reg;         /** The temporary register index */
  830.    GLuint Start, End;  /** Start/end instruction numbers */
  831. };
  832.  
  833.  
  834. /** A list of register intervals */
  835. struct interval_list
  836. {
  837.    GLuint Num;
  838.    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  839. };
  840.  
  841.  
  842. static void
  843. append_interval(struct interval_list *list, const struct interval *inv)
  844. {
  845.    list->Intervals[list->Num++] = *inv;
  846. }
  847.  
  848.  
  849. /** Insert interval inv into list, sorted by interval end */
  850. static void
  851. insert_interval_by_end(struct interval_list *list, const struct interval *inv)
  852. {
  853.    /* XXX we could do a binary search insertion here since list is sorted */
  854.    GLint i = list->Num - 1;
  855.    while (i >= 0 && list->Intervals[i].End > inv->End) {
  856.       list->Intervals[i + 1] = list->Intervals[i];
  857.       i--;
  858.    }
  859.    list->Intervals[i + 1] = *inv;
  860.    list->Num++;
  861.  
  862. #ifdef DEBUG
  863.    {
  864.       GLuint i;
  865.       for (i = 0; i + 1 < list->Num; i++) {
  866.          ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
  867.       }
  868.    }
  869. #endif
  870. }
  871.  
  872.  
  873. /** Remove the given interval from the interval list */
  874. static void
  875. remove_interval(struct interval_list *list, const struct interval *inv)
  876. {
  877.    /* XXX we could binary search since list is sorted */
  878.    GLuint k;
  879.    for (k = 0; k < list->Num; k++) {
  880.       if (list->Intervals[k].Reg == inv->Reg) {
  881.          /* found, remove it */
  882.          ASSERT(list->Intervals[k].Start == inv->Start);
  883.          ASSERT(list->Intervals[k].End == inv->End);
  884.          while (k < list->Num - 1) {
  885.             list->Intervals[k] = list->Intervals[k + 1];
  886.             k++;
  887.          }
  888.          list->Num--;
  889.          return;
  890.       }
  891.    }
  892. }
  893.  
  894.  
  895. /** called by qsort() */
  896. static int
  897. compare_start(const void *a, const void *b)
  898. {
  899.    const struct interval *ia = (const struct interval *) a;
  900.    const struct interval *ib = (const struct interval *) b;
  901.    if (ia->Start < ib->Start)
  902.       return -1;
  903.    else if (ia->Start > ib->Start)
  904.       return +1;
  905.    else
  906.       return 0;
  907. }
  908.  
  909.  
  910. /** sort the interval list according to interval starts */
  911. static void
  912. sort_interval_list_by_start(struct interval_list *list)
  913. {
  914.    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
  915. #ifdef DEBUG
  916.    {
  917.       GLuint i;
  918.       for (i = 0; i + 1 < list->Num; i++) {
  919.          ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
  920.       }
  921.    }
  922. #endif
  923. }
  924.  
  925. struct loop_info
  926. {
  927.    GLuint Start, End;  /**< Start, end instructions of loop */
  928. };
  929.  
  930. /**
  931.  * Update the intermediate interval info for register 'index' and
  932.  * instruction 'ic'.
  933.  */
  934. static void
  935. update_interval(GLint intBegin[], GLint intEnd[],
  936.                 struct loop_info *loopStack, GLuint loopStackDepth,
  937.                 GLuint index, GLuint ic)
  938. {
  939.    int i;
  940.  
  941.    /* If the register is used in a loop, extend its lifetime through the end
  942.     * of the outermost loop that doesn't contain its definition.
  943.     */
  944.    for (i = 0; i < loopStackDepth; i++) {
  945.       if (intBegin[index] < loopStack[i].Start) {
  946.          ic = loopStack[i].End;
  947.          break;
  948.       }
  949.    }
  950.  
  951.    ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  952.    if (intBegin[index] == -1) {
  953.       ASSERT(intEnd[index] == -1);
  954.       intBegin[index] = intEnd[index] = ic;
  955.    }
  956.    else {
  957.       intEnd[index] = ic;
  958.    }
  959. }
  960.  
  961.  
  962. /**
  963.  * Find first/last instruction that references each temporary register.
  964.  */
  965. GLboolean
  966. _mesa_find_temp_intervals(const struct prog_instruction *instructions,
  967.                           GLuint numInstructions,
  968.                           GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
  969.                           GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  970. {
  971.    struct loop_info loopStack[MAX_LOOP_NESTING];
  972.    GLuint loopStackDepth = 0;
  973.    GLuint i;
  974.  
  975.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  976.       intBegin[i] = intEnd[i] = -1;
  977.    }
  978.  
  979.    /* Scan instructions looking for temporary registers */
  980.    for (i = 0; i < numInstructions; i++) {
  981.       const struct prog_instruction *inst = instructions + i;
  982.       if (inst->Opcode == OPCODE_BGNLOOP) {
  983.          loopStack[loopStackDepth].Start = i;
  984.          loopStack[loopStackDepth].End = inst->BranchTarget;
  985.          loopStackDepth++;
  986.       }
  987.       else if (inst->Opcode == OPCODE_ENDLOOP) {
  988.          loopStackDepth--;
  989.       }
  990.       else if (inst->Opcode == OPCODE_CAL) {
  991.          return GL_FALSE;
  992.       }
  993.       else {
  994.          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
  995.          GLuint j;
  996.          for (j = 0; j < numSrc; j++) {
  997.             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  998.                const GLuint index = inst->SrcReg[j].Index;
  999.                if (inst->SrcReg[j].RelAddr)
  1000.                   return GL_FALSE;
  1001.                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1002.                                index, i);
  1003.             }
  1004.          }
  1005.          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  1006.             const GLuint index = inst->DstReg.Index;
  1007.             if (inst->DstReg.RelAddr)
  1008.                return GL_FALSE;
  1009.             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1010.                             index, i);
  1011.          }
  1012.       }
  1013.    }
  1014.  
  1015.    return GL_TRUE;
  1016. }
  1017.  
  1018.  
  1019. /**
  1020.  * Find the live intervals for each temporary register in the program.
  1021.  * For register R, the interval [A,B] indicates that R is referenced
  1022.  * from instruction A through instruction B.
  1023.  * Special consideration is needed for loops and subroutines.
  1024.  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
  1025.  */
  1026. static GLboolean
  1027. find_live_intervals(struct gl_program *prog,
  1028.                     struct interval_list *liveIntervals)
  1029. {
  1030.    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1031.    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1032.    GLuint i;
  1033.  
  1034.    /*
  1035.     * Note: we'll return GL_FALSE below if we find relative indexing
  1036.     * into the TEMP register file.  We can't handle that yet.
  1037.     * We also give up on subroutines for now.
  1038.     */
  1039.  
  1040.    if (dbg) {
  1041.       printf("Optimize: Begin find intervals\n");
  1042.    }
  1043.  
  1044.    /* build intermediate arrays */
  1045.    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
  1046.                                   intBegin, intEnd))
  1047.       return GL_FALSE;
  1048.  
  1049.    /* Build live intervals list from intermediate arrays */
  1050.    liveIntervals->Num = 0;
  1051.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
  1052.       if (intBegin[i] >= 0) {
  1053.          struct interval inv;
  1054.          inv.Reg = i;
  1055.          inv.Start = intBegin[i];
  1056.          inv.End = intEnd[i];
  1057.          append_interval(liveIntervals, &inv);
  1058.       }
  1059.    }
  1060.  
  1061.    /* Sort the list according to interval starts */
  1062.    sort_interval_list_by_start(liveIntervals);
  1063.  
  1064.    if (dbg) {
  1065.       /* print interval info */
  1066.       for (i = 0; i < liveIntervals->Num; i++) {
  1067.          const struct interval *inv = liveIntervals->Intervals + i;
  1068.          printf("Reg[%d] live [%d, %d]:",
  1069.                       inv->Reg, inv->Start, inv->End);
  1070.          if (1) {
  1071.             GLuint j;
  1072.             for (j = 0; j < inv->Start; j++)
  1073.                printf(" ");
  1074.             for (j = inv->Start; j <= inv->End; j++)
  1075.                printf("x");
  1076.          }
  1077.          printf("\n");
  1078.       }
  1079.    }
  1080.  
  1081.    return GL_TRUE;
  1082. }
  1083.  
  1084.  
  1085. /** Scan the array of used register flags to find free entry */
  1086. static GLint
  1087. alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  1088. {
  1089.    GLuint k;
  1090.    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
  1091.       if (!usedRegs[k]) {
  1092.          usedRegs[k] = GL_TRUE;
  1093.          return k;
  1094.       }
  1095.    }
  1096.    return -1;
  1097. }
  1098.  
  1099.  
  1100. /**
  1101.  * This function implements "Linear Scan Register Allocation" to reduce
  1102.  * the number of temporary registers used by the program.
  1103.  *
  1104.  * We compute the "live interval" for all temporary registers then
  1105.  * examine the overlap of the intervals to allocate new registers.
  1106.  * Basically, if two intervals do not overlap, they can use the same register.
  1107.  */
  1108. static void
  1109. _mesa_reallocate_registers(struct gl_program *prog)
  1110. {
  1111.    struct interval_list liveIntervals;
  1112.    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1113.    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1114.    GLuint i;
  1115.    GLint maxTemp = -1;
  1116.  
  1117.    if (dbg) {
  1118.       printf("Optimize: Begin live-interval register reallocation\n");
  1119.       _mesa_print_program(prog);
  1120.    }
  1121.  
  1122.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  1123.       registerMap[i] = -1;
  1124.       usedRegs[i] = GL_FALSE;
  1125.    }
  1126.  
  1127.    if (!find_live_intervals(prog, &liveIntervals)) {
  1128.       if (dbg)
  1129.          printf("Aborting register reallocation\n");
  1130.       return;
  1131.    }
  1132.  
  1133.    {
  1134.       struct interval_list activeIntervals;
  1135.       activeIntervals.Num = 0;
  1136.  
  1137.       /* loop over live intervals, allocating a new register for each */
  1138.       for (i = 0; i < liveIntervals.Num; i++) {
  1139.          const struct interval *live = liveIntervals.Intervals + i;
  1140.  
  1141.          if (dbg)
  1142.             printf("Consider register %u\n", live->Reg);
  1143.  
  1144.          /* Expire old intervals.  Intervals which have ended with respect
  1145.           * to the live interval can have their remapped registers freed.
  1146.           */
  1147.          {
  1148.             GLint j;
  1149.             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
  1150.                const struct interval *inv = activeIntervals.Intervals + j;
  1151.                if (inv->End >= live->Start) {
  1152.                   /* Stop now.  Since the activeInterval list is sorted
  1153.                    * we know we don't have to go further.
  1154.                    */
  1155.                   break;
  1156.                }
  1157.                else {
  1158.                   /* Interval 'inv' has expired */
  1159.                   const GLint regNew = registerMap[inv->Reg];
  1160.                   ASSERT(regNew >= 0);
  1161.  
  1162.                   if (dbg)
  1163.                      printf("  expire interval for reg %u\n", inv->Reg);
  1164.  
  1165.                   /* remove interval j from active list */
  1166.                   remove_interval(&activeIntervals, inv);
  1167.                   j--;  /* counter-act j++ in for-loop above */
  1168.  
  1169.                   /* return register regNew to the free pool */
  1170.                   if (dbg)
  1171.                      printf("  free reg %d\n", regNew);
  1172.                   ASSERT(usedRegs[regNew] == GL_TRUE);
  1173.                   usedRegs[regNew] = GL_FALSE;
  1174.                }
  1175.             }
  1176.          }
  1177.  
  1178.          /* find a free register for this live interval */
  1179.          {
  1180.             const GLint k = alloc_register(usedRegs);
  1181.             if (k < 0) {
  1182.                /* out of registers, give up */
  1183.                return;
  1184.             }
  1185.             registerMap[live->Reg] = k;
  1186.             maxTemp = MAX2(maxTemp, k);
  1187.             if (dbg)
  1188.                printf("  remap register %u -> %d\n", live->Reg, k);
  1189.          }
  1190.  
  1191.          /* Insert this live interval into the active list which is sorted
  1192.           * by increasing end points.
  1193.           */
  1194.          insert_interval_by_end(&activeIntervals, live);
  1195.       }
  1196.    }
  1197.  
  1198.    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
  1199.       /* OK, we've reduced the number of registers needed.
  1200.        * Scan the program and replace all the old temporary register
  1201.        * indexes with the new indexes.
  1202.        */
  1203.       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
  1204.  
  1205.       prog->NumTemporaries = maxTemp + 1;
  1206.    }
  1207.  
  1208.    if (dbg) {
  1209.       printf("Optimize: End live-interval register reallocation\n");
  1210.       printf("Num temp regs before: %u  after: %u\n",
  1211.                    liveIntervals.Num, maxTemp + 1);
  1212.       _mesa_print_program(prog);
  1213.    }
  1214. }
  1215.  
  1216.  
  1217. #if 0
  1218. static void
  1219. print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
  1220.    fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
  1221.    _mesa_print_program(program);
  1222.    _mesa_print_program_parameters(ctx, program);
  1223.    fprintf(stderr, "\n\n");
  1224. }
  1225. #endif
  1226.  
  1227.  
  1228. /**
  1229.  * Apply optimizations to the given program to eliminate unnecessary
  1230.  * instructions, temp regs, etc.
  1231.  */
  1232. void
  1233. _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
  1234. {
  1235.    GLboolean any_change;
  1236.  
  1237.    /* Stop when no modifications were output */
  1238.    do {
  1239.       any_change = GL_FALSE;
  1240.       _mesa_remove_extra_move_use(program);
  1241.       if (_mesa_remove_dead_code_global(program))
  1242.          any_change = GL_TRUE;
  1243.       if (_mesa_remove_extra_moves(program))
  1244.          any_change = GL_TRUE;
  1245.       if (_mesa_remove_dead_code_local(program))
  1246.          any_change = GL_TRUE;
  1247.       _mesa_reallocate_registers(program);
  1248.    } while (any_change);
  1249. }
  1250.  
  1251.