Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  20.  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21.  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22.  */
  23.  
  24.  
  25.  
  26. #include "main/glheader.h"
  27. #include "main/context.h"
  28. #include "main/macros.h"
  29. #include "program.h"
  30. #include "prog_instruction.h"
  31. #include "prog_optimize.h"
  32. #include "prog_print.h"
  33.  
  34.  
  35. #define MAX_LOOP_NESTING 50
  36. /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
  37.  * register allocate many temporary values into that small number of
  38.  * temps.  So allow large temporary indices coming into the register
  39.  * allocator.
  40.  */
  41. #define REG_ALLOCATE_MAX_PROGRAM_TEMPS  ((1 << INST_INDEX_BITS) - 1)
  42.  
  43. static GLboolean dbg = GL_FALSE;
  44.  
  45. #define NO_MASK 0xf
  46.  
  47. /**
  48.  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
  49.  * are read from the given src in this instruction, We also provide
  50.  * one optional masks which may mask other components in the dst
  51.  * register
  52.  */
  53. static GLuint
  54. get_src_arg_mask(const struct prog_instruction *inst,
  55.                  GLuint arg, GLuint dst_mask)
  56. {
  57.    GLuint read_mask, channel_mask;
  58.    GLuint comp;
  59.  
  60.    ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
  61.  
  62.    /* Form the dst register, find the written channels */
  63.    if (inst->CondUpdate) {
  64.       channel_mask = WRITEMASK_XYZW;
  65.    }
  66.    else {
  67.       switch (inst->Opcode) {
  68.       case OPCODE_MOV:
  69.       case OPCODE_MIN:
  70.       case OPCODE_MAX:
  71.       case OPCODE_ABS:
  72.       case OPCODE_ADD:
  73.       case OPCODE_MAD:
  74.       case OPCODE_MUL:
  75.       case OPCODE_SUB:
  76.       case OPCODE_CMP:
  77.       case OPCODE_FLR:
  78.       case OPCODE_FRC:
  79.       case OPCODE_LRP:
  80.       case OPCODE_SEQ:
  81.       case OPCODE_SGE:
  82.       case OPCODE_SGT:
  83.       case OPCODE_SLE:
  84.       case OPCODE_SLT:
  85.       case OPCODE_SNE:
  86.       case OPCODE_SSG:
  87.          channel_mask = inst->DstReg.WriteMask & dst_mask;
  88.          break;
  89.       case OPCODE_RCP:
  90.       case OPCODE_SIN:
  91.       case OPCODE_COS:
  92.       case OPCODE_RSQ:
  93.       case OPCODE_POW:
  94.       case OPCODE_EX2:
  95.       case OPCODE_LOG:
  96.          channel_mask = WRITEMASK_X;
  97.          break;
  98.       case OPCODE_DP2:
  99.          channel_mask = WRITEMASK_XY;
  100.          break;
  101.       case OPCODE_DP3:
  102.       case OPCODE_XPD:
  103.          channel_mask = WRITEMASK_XYZ;
  104.          break;
  105.       default:
  106.          channel_mask = WRITEMASK_XYZW;
  107.          break;
  108.       }
  109.    }
  110.  
  111.    /* Now, given the src swizzle and the written channels, find which
  112.     * components are actually read
  113.     */
  114.    read_mask = 0x0;
  115.    for (comp = 0; comp < 4; ++comp) {
  116.       const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
  117.       ASSERT(coord < 4);
  118.       if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
  119.          read_mask |= 1 << coord;
  120.    }
  121.  
  122.    return read_mask;
  123. }
  124.  
  125.  
  126. /**
  127.  * For a MOV instruction, compute a write mask when src register also has
  128.  * a mask
  129.  */
  130. static GLuint
  131. get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
  132. {
  133.    const GLuint mask = mov->DstReg.WriteMask;
  134.    GLuint comp;
  135.    GLuint updated_mask = 0x0;
  136.  
  137.    ASSERT(mov->Opcode == OPCODE_MOV);
  138.  
  139.    for (comp = 0; comp < 4; ++comp) {
  140.       GLuint src_comp;
  141.       if ((mask & (1 << comp)) == 0)
  142.          continue;
  143.       src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
  144.       if ((src_mask & (1 << src_comp)) == 0)
  145.          continue;
  146.       updated_mask |= 1 << comp;
  147.    }
  148.  
  149.    return updated_mask;
  150. }
  151.  
  152.  
  153. /**
  154.  * Ensure that the swizzle is regular.  That is, all of the swizzle
  155.  * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
  156.  */
  157. static GLboolean
  158. is_swizzle_regular(GLuint swz)
  159. {
  160.    return GET_SWZ(swz,0) <= SWIZZLE_W &&
  161.           GET_SWZ(swz,1) <= SWIZZLE_W &&
  162.           GET_SWZ(swz,2) <= SWIZZLE_W &&
  163.           GET_SWZ(swz,3) <= SWIZZLE_W;
  164. }
  165.  
  166.  
  167. /**
  168.  * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
  169.  * \return number of instructions removed
  170.  */
  171. static GLuint
  172. remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
  173. {
  174.    GLint i, removeEnd = 0, removeCount = 0;
  175.    GLuint totalRemoved = 0;
  176.  
  177.    /* go backward */
  178.    for (i = prog->NumInstructions - 1; i >= 0; i--) {
  179.       if (removeFlags[i]) {
  180.          totalRemoved++;
  181.          if (removeCount == 0) {
  182.             /* begin a run of instructions to remove */
  183.             removeEnd = i;
  184.             removeCount = 1;
  185.          }
  186.          else {
  187.             /* extend the run of instructions to remove */
  188.             removeCount++;
  189.          }
  190.       }
  191.       else {
  192.          /* don't remove this instruction, but check if the preceeding
  193.           * instructions are to be removed.
  194.           */
  195.          if (removeCount > 0) {
  196.             GLint removeStart = removeEnd - removeCount + 1;
  197.             _mesa_delete_instructions(prog, removeStart, removeCount);
  198.             removeStart = removeCount = 0; /* reset removal info */
  199.          }
  200.       }
  201.    }
  202.    /* Finish removing if the first instruction was to be removed. */
  203.    if (removeCount > 0) {
  204.       GLint removeStart = removeEnd - removeCount + 1;
  205.       _mesa_delete_instructions(prog, removeStart, removeCount);
  206.    }
  207.    return totalRemoved;
  208. }
  209.  
  210.  
  211. /**
  212.  * Remap register indexes according to map.
  213.  * \param prog  the program to search/replace
  214.  * \param file  the type of register file to search/replace
  215.  * \param map  maps old register indexes to new indexes
  216.  */
  217. static void
  218. replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
  219. {
  220.    GLuint i;
  221.  
  222.    for (i = 0; i < prog->NumInstructions; i++) {
  223.       struct prog_instruction *inst = prog->Instructions + i;
  224.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  225.       GLuint j;
  226.       for (j = 0; j < numSrc; j++) {
  227.          if (inst->SrcReg[j].File == file) {
  228.             GLuint index = inst->SrcReg[j].Index;
  229.             ASSERT(map[index] >= 0);
  230.             inst->SrcReg[j].Index = map[index];
  231.          }
  232.       }
  233.       if (inst->DstReg.File == file) {
  234.          const GLuint index = inst->DstReg.Index;
  235.          ASSERT(map[index] >= 0);
  236.          inst->DstReg.Index = map[index];
  237.       }
  238.    }
  239. }
  240.  
  241.  
  242. /**
  243.  * Remove dead instructions from the given program.
  244.  * This is very primitive for now.  Basically look for temp registers
  245.  * that are written to but never read.  Remove any instructions that
  246.  * write to such registers.  Be careful with condition code setters.
  247.  */
  248. static GLboolean
  249. _mesa_remove_dead_code_global(struct gl_program *prog)
  250. {
  251.    GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
  252.    GLboolean *removeInst; /* per-instruction removal flag */
  253.    GLuint i, rem = 0, comp;
  254.  
  255.    memset(tempRead, 0, sizeof(tempRead));
  256.  
  257.    if (dbg) {
  258.       printf("Optimize: Begin dead code removal\n");
  259.       /*_mesa_print_program(prog);*/
  260.    }
  261.  
  262.    removeInst =
  263.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  264.  
  265.    /* Determine which temps are read and written */
  266.    for (i = 0; i < prog->NumInstructions; i++) {
  267.       const struct prog_instruction *inst = prog->Instructions + i;
  268.       const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  269.       GLuint j;
  270.  
  271.       /* check src regs */
  272.       for (j = 0; j < numSrc; j++) {
  273.          if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  274.             const GLuint index = inst->SrcReg[j].Index;
  275.             GLuint read_mask;
  276.             ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  277.             read_mask = get_src_arg_mask(inst, j, NO_MASK);
  278.  
  279.             if (inst->SrcReg[j].RelAddr) {
  280.                if (dbg)
  281.                   printf("abort remove dead code (indirect temp)\n");
  282.                goto done;
  283.             }
  284.  
  285.             for (comp = 0; comp < 4; comp++) {
  286.                const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
  287.                ASSERT(swz < 4);
  288.                if ((read_mask & (1 << swz)) == 0)
  289.                   continue;
  290.                if (swz <= SWIZZLE_W)
  291.                   tempRead[index][swz] = GL_TRUE;
  292.             }
  293.          }
  294.       }
  295.  
  296.       /* check dst reg */
  297.       if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  298.          const GLuint index = inst->DstReg.Index;
  299.          ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  300.  
  301.          if (inst->DstReg.RelAddr) {
  302.             if (dbg)
  303.                printf("abort remove dead code (indirect temp)\n");
  304.             goto done;
  305.          }
  306.  
  307.          if (inst->CondUpdate) {
  308.             /* If we're writing to this register and setting condition
  309.              * codes we cannot remove the instruction.  Prevent removal
  310.              * by setting the 'read' flag.
  311.              */
  312.             tempRead[index][0] = GL_TRUE;
  313.             tempRead[index][1] = GL_TRUE;
  314.             tempRead[index][2] = GL_TRUE;
  315.             tempRead[index][3] = GL_TRUE;
  316.          }
  317.       }
  318.    }
  319.  
  320.    /* find instructions that write to dead registers, flag for removal */
  321.    for (i = 0; i < prog->NumInstructions; i++) {
  322.       struct prog_instruction *inst = prog->Instructions + i;
  323.       const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
  324.  
  325.       if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
  326.          GLint chan, index = inst->DstReg.Index;
  327.  
  328.          for (chan = 0; chan < 4; chan++) {
  329.             if (!tempRead[index][chan] &&
  330.                 inst->DstReg.WriteMask & (1 << chan)) {
  331.                if (dbg) {
  332.                   printf("Remove writemask on %u.%c\n", i,
  333.                                chan == 3 ? 'w' : 'x' + chan);
  334.                }
  335.                inst->DstReg.WriteMask &= ~(1 << chan);
  336.                rem++;
  337.             }
  338.          }
  339.  
  340.          if (inst->DstReg.WriteMask == 0) {
  341.             /* If we cleared all writes, the instruction can be removed. */
  342.             if (dbg)
  343.                printf("Remove instruction %u: \n", i);
  344.             removeInst[i] = GL_TRUE;
  345.          }
  346.       }
  347.    }
  348.  
  349.    /* now remove the instructions which aren't needed */
  350.    rem = remove_instructions(prog, removeInst);
  351.  
  352.    if (dbg) {
  353.       printf("Optimize: End dead code removal.\n");
  354.       printf("  %u channel writes removed\n", rem);
  355.       printf("  %u instructions removed\n", rem);
  356.       /*_mesa_print_program(prog);*/
  357.    }
  358.  
  359. done:
  360.    free(removeInst);
  361.    return rem != 0;
  362. }
  363.  
  364.  
  365. enum inst_use
  366. {
  367.    READ,
  368.    WRITE,
  369.    FLOW,
  370.    END
  371. };
  372.  
  373.  
  374. /**
  375.  * Scan forward in program from 'start' for the next occurances of TEMP[index].
  376.  * We look if an instruction reads the component given by the masks and if they
  377.  * are overwritten.
  378.  * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
  379.  * that we can't look further.
  380.  */
  381. static enum inst_use
  382. find_next_use(const struct gl_program *prog,
  383.               GLuint start,
  384.               GLuint index,
  385.               GLuint mask)
  386. {
  387.    GLuint i;
  388.  
  389.    for (i = start; i < prog->NumInstructions; i++) {
  390.       const struct prog_instruction *inst = prog->Instructions + i;
  391.       switch (inst->Opcode) {
  392.       case OPCODE_BGNLOOP:
  393.       case OPCODE_BGNSUB:
  394.       case OPCODE_CAL:
  395.       case OPCODE_CONT:
  396.       case OPCODE_IF:
  397.       case OPCODE_ELSE:
  398.       case OPCODE_ENDIF:
  399.       case OPCODE_ENDLOOP:
  400.       case OPCODE_ENDSUB:
  401.       case OPCODE_RET:
  402.          return FLOW;
  403.       case OPCODE_END:
  404.          return END;
  405.       default:
  406.          {
  407.             const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
  408.             GLuint j;
  409.             for (j = 0; j < numSrc; j++) {
  410.                if (inst->SrcReg[j].RelAddr ||
  411.                    (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
  412.                    inst->SrcReg[j].Index == index &&
  413.                    (get_src_arg_mask(inst,j,NO_MASK) & mask)))
  414.                   return READ;
  415.             }
  416.             if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
  417.                 inst->DstReg.File == PROGRAM_TEMPORARY &&
  418.                 inst->DstReg.Index == index) {
  419.                mask &= ~inst->DstReg.WriteMask;
  420.                if (mask == 0)
  421.                   return WRITE;
  422.             }
  423.          }
  424.       }
  425.    }
  426.    return END;
  427. }
  428.  
  429.  
  430. /**
  431.  * Is the given instruction opcode a flow-control opcode?
  432.  * XXX maybe move this into prog_instruction.[ch]
  433.  */
  434. static GLboolean
  435. _mesa_is_flow_control_opcode(enum prog_opcode opcode)
  436. {
  437.    switch (opcode) {
  438.    case OPCODE_BGNLOOP:
  439.    case OPCODE_BGNSUB:
  440.    case OPCODE_CAL:
  441.    case OPCODE_CONT:
  442.    case OPCODE_IF:
  443.    case OPCODE_ELSE:
  444.    case OPCODE_END:
  445.    case OPCODE_ENDIF:
  446.    case OPCODE_ENDLOOP:
  447.    case OPCODE_ENDSUB:
  448.    case OPCODE_RET:
  449.       return GL_TRUE;
  450.    default:
  451.       return GL_FALSE;
  452.    }
  453. }
  454.  
  455.  
  456. /**
  457.  * Test if the given instruction is a simple MOV (no conditional updating,
  458.  * not relative addressing, no negation/abs, etc).
  459.  */
  460. static GLboolean
  461. can_downward_mov_be_modifed(const struct prog_instruction *mov)
  462. {
  463.    return
  464.       mov->Opcode == OPCODE_MOV &&
  465.       mov->CondUpdate == GL_FALSE &&
  466.       mov->SrcReg[0].RelAddr == 0 &&
  467.       mov->SrcReg[0].Negate == 0 &&
  468.       mov->SrcReg[0].Abs == 0 &&
  469.       mov->SrcReg[0].HasIndex2 == 0 &&
  470.       mov->SrcReg[0].RelAddr2 == 0 &&
  471.       mov->DstReg.RelAddr == 0 &&
  472.       mov->DstReg.CondMask == COND_TR;
  473. }
  474.  
  475.  
  476. static GLboolean
  477. can_upward_mov_be_modifed(const struct prog_instruction *mov)
  478. {
  479.    return
  480.       can_downward_mov_be_modifed(mov) &&
  481.       mov->DstReg.File == PROGRAM_TEMPORARY &&
  482.       mov->SaturateMode == SATURATE_OFF;
  483. }
  484.  
  485.  
  486. /**
  487.  * Try to remove use of extraneous MOV instructions, to free them up for dead
  488.  * code removal.
  489.  */
  490. static void
  491. _mesa_remove_extra_move_use(struct gl_program *prog)
  492. {
  493.    GLuint i, j;
  494.  
  495.    if (dbg) {
  496.       printf("Optimize: Begin remove extra move use\n");
  497.       _mesa_print_program(prog);
  498.    }
  499.  
  500.    /*
  501.     * Look for sequences such as this:
  502.     *    MOV tmpX, arg0;
  503.     *    ...
  504.     *    FOO tmpY, tmpX, arg1;
  505.     * and convert into:
  506.     *    MOV tmpX, arg0;
  507.     *    ...
  508.     *    FOO tmpY, arg0, arg1;
  509.     */
  510.  
  511.    for (i = 0; i + 1 < prog->NumInstructions; i++) {
  512.       const struct prog_instruction *mov = prog->Instructions + i;
  513.       GLuint dst_mask, src_mask;
  514.       if (can_upward_mov_be_modifed(mov) == GL_FALSE)
  515.          continue;
  516.  
  517.       /* Scanning the code, we maintain the components which are still active in
  518.        * these two masks
  519.        */
  520.       dst_mask = mov->DstReg.WriteMask;
  521.       src_mask = get_src_arg_mask(mov, 0, NO_MASK);
  522.  
  523.       /* Walk through remaining instructions until the or src reg gets
  524.        * rewritten or we get into some flow-control, eliminating the use of
  525.        * this MOV.
  526.        */
  527.       for (j = i + 1; j < prog->NumInstructions; j++) {
  528.          struct prog_instruction *inst2 = prog->Instructions + j;
  529.          GLuint arg;
  530.  
  531.          if (_mesa_is_flow_control_opcode(inst2->Opcode))
  532.              break;
  533.  
  534.          /* First rewrite this instruction's args if appropriate. */
  535.          for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
  536.             GLuint comp, read_mask;
  537.  
  538.             if (inst2->SrcReg[arg].File != mov->DstReg.File ||
  539.                 inst2->SrcReg[arg].Index != mov->DstReg.Index ||
  540.                 inst2->SrcReg[arg].RelAddr ||
  541.                 inst2->SrcReg[arg].Abs)
  542.                continue;
  543.             read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
  544.  
  545.             /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
  546.              * components read still come from the mov instructions
  547.              */
  548.             if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
  549.                (read_mask & dst_mask) == read_mask) {
  550.                for (comp = 0; comp < 4; comp++) {
  551.                   const GLuint inst2_swz =
  552.                      GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
  553.                   const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
  554.                   inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
  555.                   inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
  556.                   inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
  557.                                                   inst2_swz) & 0x1) << comp);
  558.                }
  559.                inst2->SrcReg[arg].File = mov->SrcReg[0].File;
  560.                inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
  561.             }
  562.          }
  563.  
  564.          /* The source of MOV is written. This potentially deactivates some
  565.           * components from the src and dst of the MOV instruction
  566.           */
  567.          if (inst2->DstReg.File == mov->DstReg.File &&
  568.              (inst2->DstReg.RelAddr ||
  569.               inst2->DstReg.Index == mov->DstReg.Index)) {
  570.             dst_mask &= ~inst2->DstReg.WriteMask;
  571.             src_mask = get_src_arg_mask(mov, 0, dst_mask);
  572.          }
  573.  
  574.          /* Idem when the destination of mov is written */
  575.          if (inst2->DstReg.File == mov->SrcReg[0].File &&
  576.              (inst2->DstReg.RelAddr ||
  577.               inst2->DstReg.Index == mov->SrcReg[0].Index)) {
  578.             src_mask &= ~inst2->DstReg.WriteMask;
  579.             dst_mask &= get_dst_mask_for_mov(mov, src_mask);
  580.          }
  581.          if (dst_mask == 0)
  582.             break;
  583.       }
  584.    }
  585.  
  586.    if (dbg) {
  587.       printf("Optimize: End remove extra move use.\n");
  588.       /*_mesa_print_program(prog);*/
  589.    }
  590. }
  591.  
  592.  
  593. /**
  594.  * Complements dead_code_global. Try to remove code in block of code by
  595.  * carefully monitoring the swizzles. Both functions should be merged into one
  596.  * with a proper control flow graph
  597.  */
  598. static GLboolean
  599. _mesa_remove_dead_code_local(struct gl_program *prog)
  600. {
  601.    GLboolean *removeInst;
  602.    GLuint i, arg, rem = 0;
  603.  
  604.    removeInst =
  605.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  606.  
  607.    for (i = 0; i < prog->NumInstructions; i++) {
  608.       const struct prog_instruction *inst = prog->Instructions + i;
  609.       const GLuint index = inst->DstReg.Index;
  610.       const GLuint mask = inst->DstReg.WriteMask;
  611.       enum inst_use use;
  612.  
  613.       /* We must deactivate the pass as soon as some indirection is used */
  614.       if (inst->DstReg.RelAddr)
  615.          goto done;
  616.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
  617.          if (inst->SrcReg[arg].RelAddr)
  618.             goto done;
  619.  
  620.       if (_mesa_is_flow_control_opcode(inst->Opcode) ||
  621.           _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
  622.           inst->DstReg.File != PROGRAM_TEMPORARY ||
  623.           inst->DstReg.RelAddr)
  624.          continue;
  625.  
  626.       use = find_next_use(prog, i+1, index, mask);
  627.       if (use == WRITE || use == END)
  628.          removeInst[i] = GL_TRUE;
  629.    }
  630.  
  631.    rem = remove_instructions(prog, removeInst);
  632.  
  633. done:
  634.    free(removeInst);
  635.    return rem != 0;
  636. }
  637.  
  638.  
  639. /**
  640.  * Try to inject the destination of mov as the destination of inst and recompute
  641.  * the swizzles operators for the sources of inst if required. Return GL_TRUE
  642.  * of the substitution was possible, GL_FALSE otherwise
  643.  */
  644. static GLboolean
  645. _mesa_merge_mov_into_inst(struct prog_instruction *inst,
  646.                           const struct prog_instruction *mov)
  647. {
  648.    /* Indirection table which associates destination and source components for
  649.     * the mov instruction
  650.     */
  651.    const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
  652.  
  653.    /* Some components are not written by inst. We cannot remove the mov */
  654.    if (mask != (inst->DstReg.WriteMask & mask))
  655.       return GL_FALSE;
  656.  
  657.    inst->SaturateMode |= mov->SaturateMode;
  658.  
  659.    /* Depending on the instruction, we may need to recompute the swizzles.
  660.     * Also, some other instructions (like TEX) are not linear. We will only
  661.     * consider completely active sources and destinations
  662.     */
  663.    switch (inst->Opcode) {
  664.  
  665.    /* Carstesian instructions: we compute the swizzle */
  666.    case OPCODE_MOV:
  667.    case OPCODE_MIN:
  668.    case OPCODE_MAX:
  669.    case OPCODE_ABS:
  670.    case OPCODE_ADD:
  671.    case OPCODE_MAD:
  672.    case OPCODE_MUL:
  673.    case OPCODE_SUB:
  674.    {
  675.       GLuint dst_to_src_comp[4] = {0,0,0,0};
  676.       GLuint dst_comp, arg;
  677.       for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  678.          if (mov->DstReg.WriteMask & (1 << dst_comp)) {
  679.             const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
  680.             ASSERT(src_comp < 4);
  681.             dst_to_src_comp[dst_comp] = src_comp;
  682.          }
  683.       }
  684.  
  685.       /* Patch each source of the instruction */
  686.       for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
  687.          const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
  688.          inst->SrcReg[arg].Swizzle = 0;
  689.  
  690.          /* Reset each active component of the swizzle */
  691.          for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
  692.             GLuint src_comp, arg_comp;
  693.             if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
  694.                continue;
  695.             src_comp = dst_to_src_comp[dst_comp];
  696.             ASSERT(src_comp < 4);
  697.             arg_comp = GET_SWZ(arg_swz, src_comp);
  698.             ASSERT(arg_comp < 4);
  699.             inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
  700.          }
  701.       }
  702.       inst->DstReg = mov->DstReg;
  703.       return GL_TRUE;
  704.    }
  705.  
  706.    /* Dot products and scalar instructions: we only change the destination */
  707.    case OPCODE_RCP:
  708.    case OPCODE_SIN:
  709.    case OPCODE_COS:
  710.    case OPCODE_RSQ:
  711.    case OPCODE_POW:
  712.    case OPCODE_EX2:
  713.    case OPCODE_LOG:
  714.    case OPCODE_DP2:
  715.    case OPCODE_DP3:
  716.    case OPCODE_DP4:
  717.       inst->DstReg = mov->DstReg;
  718.       return GL_TRUE;
  719.  
  720.    /* All other instructions require fully active components with no swizzle */
  721.    default:
  722.       if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
  723.           inst->DstReg.WriteMask != WRITEMASK_XYZW)
  724.          return GL_FALSE;
  725.       inst->DstReg = mov->DstReg;
  726.       return GL_TRUE;
  727.    }
  728. }
  729.  
  730.  
  731. /**
  732.  * Try to remove extraneous MOV instructions from the given program.
  733.  */
  734. static GLboolean
  735. _mesa_remove_extra_moves(struct gl_program *prog)
  736. {
  737.    GLboolean *removeInst; /* per-instruction removal flag */
  738.    GLuint i, rem = 0, nesting = 0;
  739.  
  740.    if (dbg) {
  741.       printf("Optimize: Begin remove extra moves\n");
  742.       _mesa_print_program(prog);
  743.    }
  744.  
  745.    removeInst =
  746.       calloc(1, prog->NumInstructions * sizeof(GLboolean));
  747.  
  748.    /*
  749.     * Look for sequences such as this:
  750.     *    FOO tmpX, arg0, arg1;
  751.     *    MOV tmpY, tmpX;
  752.     * and convert into:
  753.     *    FOO tmpY, arg0, arg1;
  754.     */
  755.  
  756.    for (i = 0; i < prog->NumInstructions; i++) {
  757.       const struct prog_instruction *mov = prog->Instructions + i;
  758.  
  759.       switch (mov->Opcode) {
  760.       case OPCODE_BGNLOOP:
  761.       case OPCODE_BGNSUB:
  762.       case OPCODE_IF:
  763.          nesting++;
  764.          break;
  765.       case OPCODE_ENDLOOP:
  766.       case OPCODE_ENDSUB:
  767.       case OPCODE_ENDIF:
  768.          nesting--;
  769.          break;
  770.       case OPCODE_MOV:
  771.          if (i > 0 &&
  772.              can_downward_mov_be_modifed(mov) &&
  773.              mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
  774.              nesting == 0)
  775.          {
  776.  
  777.             /* see if this MOV can be removed */
  778.             const GLuint id = mov->SrcReg[0].Index;
  779.             struct prog_instruction *prevInst;
  780.             GLuint prevI;
  781.  
  782.             /* get pointer to previous instruction */
  783.             prevI = i - 1;
  784.             while (prevI > 0 && removeInst[prevI])
  785.                prevI--;
  786.             prevInst = prog->Instructions + prevI;
  787.  
  788.             if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
  789.                 prevInst->DstReg.Index == id &&
  790.                 prevInst->DstReg.RelAddr == 0 &&
  791.                 prevInst->DstReg.CondMask == COND_TR) {
  792.  
  793.                const GLuint dst_mask = prevInst->DstReg.WriteMask;
  794.                enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
  795.  
  796.                if (next_use == WRITE || next_use == END) {
  797.                   /* OK, we can safely remove this MOV instruction.
  798.                    * Transform:
  799.                    *   prevI: FOO tempIndex, x, y;
  800.                    *       i: MOV z, tempIndex;
  801.                    * Into:
  802.                    *   prevI: FOO z, x, y;
  803.                    */
  804.                   if (_mesa_merge_mov_into_inst(prevInst, mov)) {
  805.                      removeInst[i] = GL_TRUE;
  806.                      if (dbg) {
  807.                         printf("Remove MOV at %u\n", i);
  808.                         printf("new prev inst %u: ", prevI);
  809.                         _mesa_print_instruction(prevInst);
  810.                      }
  811.                   }
  812.                }
  813.             }
  814.          }
  815.          break;
  816.       default:
  817.          ; /* nothing */
  818.       }
  819.    }
  820.  
  821.    /* now remove the instructions which aren't needed */
  822.    rem = remove_instructions(prog, removeInst);
  823.  
  824.    free(removeInst);
  825.  
  826.    if (dbg) {
  827.       printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
  828.       /*_mesa_print_program(prog);*/
  829.    }
  830.  
  831.    return rem != 0;
  832. }
  833.  
  834.  
  835. /** A live register interval */
  836. struct interval
  837. {
  838.    GLuint Reg;         /** The temporary register index */
  839.    GLuint Start, End;  /** Start/end instruction numbers */
  840. };
  841.  
  842.  
  843. /** A list of register intervals */
  844. struct interval_list
  845. {
  846.    GLuint Num;
  847.    struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  848. };
  849.  
  850.  
  851. static void
  852. append_interval(struct interval_list *list, const struct interval *inv)
  853. {
  854.    list->Intervals[list->Num++] = *inv;
  855. }
  856.  
  857.  
  858. /** Insert interval inv into list, sorted by interval end */
  859. static void
  860. insert_interval_by_end(struct interval_list *list, const struct interval *inv)
  861. {
  862.    /* XXX we could do a binary search insertion here since list is sorted */
  863.    GLint i = list->Num - 1;
  864.    while (i >= 0 && list->Intervals[i].End > inv->End) {
  865.       list->Intervals[i + 1] = list->Intervals[i];
  866.       i--;
  867.    }
  868.    list->Intervals[i + 1] = *inv;
  869.    list->Num++;
  870.  
  871. #ifdef DEBUG
  872.    {
  873.       GLuint i;
  874.       for (i = 0; i + 1 < list->Num; i++) {
  875.          ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
  876.       }
  877.    }
  878. #endif
  879. }
  880.  
  881.  
  882. /** Remove the given interval from the interval list */
  883. static void
  884. remove_interval(struct interval_list *list, const struct interval *inv)
  885. {
  886.    /* XXX we could binary search since list is sorted */
  887.    GLuint k;
  888.    for (k = 0; k < list->Num; k++) {
  889.       if (list->Intervals[k].Reg == inv->Reg) {
  890.          /* found, remove it */
  891.          ASSERT(list->Intervals[k].Start == inv->Start);
  892.          ASSERT(list->Intervals[k].End == inv->End);
  893.          while (k < list->Num - 1) {
  894.             list->Intervals[k] = list->Intervals[k + 1];
  895.             k++;
  896.          }
  897.          list->Num--;
  898.          return;
  899.       }
  900.    }
  901. }
  902.  
  903.  
  904. /** called by qsort() */
  905. static int
  906. compare_start(const void *a, const void *b)
  907. {
  908.    const struct interval *ia = (const struct interval *) a;
  909.    const struct interval *ib = (const struct interval *) b;
  910.    if (ia->Start < ib->Start)
  911.       return -1;
  912.    else if (ia->Start > ib->Start)
  913.       return +1;
  914.    else
  915.       return 0;
  916. }
  917.  
  918.  
  919. /** sort the interval list according to interval starts */
  920. static void
  921. sort_interval_list_by_start(struct interval_list *list)
  922. {
  923.    qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
  924. #ifdef DEBUG
  925.    {
  926.       GLuint i;
  927.       for (i = 0; i + 1 < list->Num; i++) {
  928.          ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
  929.       }
  930.    }
  931. #endif
  932. }
  933.  
  934. struct loop_info
  935. {
  936.    GLuint Start, End;  /**< Start, end instructions of loop */
  937. };
  938.  
  939. /**
  940.  * Update the intermediate interval info for register 'index' and
  941.  * instruction 'ic'.
  942.  */
  943. static void
  944. update_interval(GLint intBegin[], GLint intEnd[],
  945.                 struct loop_info *loopStack, GLuint loopStackDepth,
  946.                 GLuint index, GLuint ic)
  947. {
  948.    int i;
  949.    GLuint begin = ic;
  950.    GLuint end = ic;
  951.  
  952.    /* If the register is used in a loop, extend its lifetime through the end
  953.     * of the outermost loop that doesn't contain its definition.
  954.     */
  955.    for (i = 0; i < loopStackDepth; i++) {
  956.       if (intBegin[index] < loopStack[i].Start) {
  957.          end = loopStack[i].End;
  958.          break;
  959.       }
  960.    }
  961.  
  962.    /* Variables that are live at the end of a loop will also be live at the
  963.     * beginning, so an instruction inside of a loop should have its live
  964.     * interval begin at the start of the outermost loop.
  965.     */
  966.    if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
  967.       begin = loopStack[0].Start;
  968.    }
  969.  
  970.    ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  971.    if (intBegin[index] == -1) {
  972.       ASSERT(intEnd[index] == -1);
  973.       intBegin[index] = begin;
  974.       intEnd[index] = end;
  975.    }
  976.    else {
  977.       intEnd[index] = end;
  978.    }
  979. }
  980.  
  981.  
  982. /**
  983.  * Find first/last instruction that references each temporary register.
  984.  */
  985. GLboolean
  986. _mesa_find_temp_intervals(const struct prog_instruction *instructions,
  987.                           GLuint numInstructions,
  988.                           GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
  989.                           GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  990. {
  991.    struct loop_info loopStack[MAX_LOOP_NESTING];
  992.    GLuint loopStackDepth = 0;
  993.    GLuint i;
  994.  
  995.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  996.       intBegin[i] = intEnd[i] = -1;
  997.    }
  998.  
  999.    /* Scan instructions looking for temporary registers */
  1000.    for (i = 0; i < numInstructions; i++) {
  1001.       const struct prog_instruction *inst = instructions + i;
  1002.       if (inst->Opcode == OPCODE_BGNLOOP) {
  1003.          loopStack[loopStackDepth].Start = i;
  1004.          loopStack[loopStackDepth].End = inst->BranchTarget;
  1005.          loopStackDepth++;
  1006.       }
  1007.       else if (inst->Opcode == OPCODE_ENDLOOP) {
  1008.          loopStackDepth--;
  1009.       }
  1010.       else if (inst->Opcode == OPCODE_CAL) {
  1011.          return GL_FALSE;
  1012.       }
  1013.       else {
  1014.          const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
  1015.          GLuint j;
  1016.          for (j = 0; j < numSrc; j++) {
  1017.             if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
  1018.                const GLuint index = inst->SrcReg[j].Index;
  1019.                if (inst->SrcReg[j].RelAddr)
  1020.                   return GL_FALSE;
  1021.                update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1022.                                index, i);
  1023.             }
  1024.          }
  1025.          if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  1026.             const GLuint index = inst->DstReg.Index;
  1027.             if (inst->DstReg.RelAddr)
  1028.                return GL_FALSE;
  1029.             update_interval(intBegin, intEnd, loopStack, loopStackDepth,
  1030.                             index, i);
  1031.          }
  1032.       }
  1033.    }
  1034.  
  1035.    return GL_TRUE;
  1036. }
  1037.  
  1038.  
  1039. /**
  1040.  * Find the live intervals for each temporary register in the program.
  1041.  * For register R, the interval [A,B] indicates that R is referenced
  1042.  * from instruction A through instruction B.
  1043.  * Special consideration is needed for loops and subroutines.
  1044.  * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
  1045.  */
  1046. static GLboolean
  1047. find_live_intervals(struct gl_program *prog,
  1048.                     struct interval_list *liveIntervals)
  1049. {
  1050.    GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1051.    GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1052.    GLuint i;
  1053.  
  1054.    /*
  1055.     * Note: we'll return GL_FALSE below if we find relative indexing
  1056.     * into the TEMP register file.  We can't handle that yet.
  1057.     * We also give up on subroutines for now.
  1058.     */
  1059.  
  1060.    if (dbg) {
  1061.       printf("Optimize: Begin find intervals\n");
  1062.    }
  1063.  
  1064.    /* build intermediate arrays */
  1065.    if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
  1066.                                   intBegin, intEnd))
  1067.       return GL_FALSE;
  1068.  
  1069.    /* Build live intervals list from intermediate arrays */
  1070.    liveIntervals->Num = 0;
  1071.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
  1072.       if (intBegin[i] >= 0) {
  1073.          struct interval inv;
  1074.          inv.Reg = i;
  1075.          inv.Start = intBegin[i];
  1076.          inv.End = intEnd[i];
  1077.          append_interval(liveIntervals, &inv);
  1078.       }
  1079.    }
  1080.  
  1081.    /* Sort the list according to interval starts */
  1082.    sort_interval_list_by_start(liveIntervals);
  1083.  
  1084.    if (dbg) {
  1085.       /* print interval info */
  1086.       for (i = 0; i < liveIntervals->Num; i++) {
  1087.          const struct interval *inv = liveIntervals->Intervals + i;
  1088.          printf("Reg[%d] live [%d, %d]:",
  1089.                       inv->Reg, inv->Start, inv->End);
  1090.          if (1) {
  1091.             GLuint j;
  1092.             for (j = 0; j < inv->Start; j++)
  1093.                printf(" ");
  1094.             for (j = inv->Start; j <= inv->End; j++)
  1095.                printf("x");
  1096.          }
  1097.          printf("\n");
  1098.       }
  1099.    }
  1100.  
  1101.    return GL_TRUE;
  1102. }
  1103.  
  1104.  
  1105. /** Scan the array of used register flags to find free entry */
  1106. static GLint
  1107. alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
  1108. {
  1109.    GLuint k;
  1110.    for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
  1111.       if (!usedRegs[k]) {
  1112.          usedRegs[k] = GL_TRUE;
  1113.          return k;
  1114.       }
  1115.    }
  1116.    return -1;
  1117. }
  1118.  
  1119.  
  1120. /**
  1121.  * This function implements "Linear Scan Register Allocation" to reduce
  1122.  * the number of temporary registers used by the program.
  1123.  *
  1124.  * We compute the "live interval" for all temporary registers then
  1125.  * examine the overlap of the intervals to allocate new registers.
  1126.  * Basically, if two intervals do not overlap, they can use the same register.
  1127.  */
  1128. static void
  1129. _mesa_reallocate_registers(struct gl_program *prog)
  1130. {
  1131.    struct interval_list liveIntervals;
  1132.    GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1133.    GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1134.    GLuint i;
  1135.    GLint maxTemp = -1;
  1136.  
  1137.    if (dbg) {
  1138.       printf("Optimize: Begin live-interval register reallocation\n");
  1139.       _mesa_print_program(prog);
  1140.    }
  1141.  
  1142.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
  1143.       registerMap[i] = -1;
  1144.       usedRegs[i] = GL_FALSE;
  1145.    }
  1146.  
  1147.    if (!find_live_intervals(prog, &liveIntervals)) {
  1148.       if (dbg)
  1149.          printf("Aborting register reallocation\n");
  1150.       return;
  1151.    }
  1152.  
  1153.    {
  1154.       struct interval_list activeIntervals;
  1155.       activeIntervals.Num = 0;
  1156.  
  1157.       /* loop over live intervals, allocating a new register for each */
  1158.       for (i = 0; i < liveIntervals.Num; i++) {
  1159.          const struct interval *live = liveIntervals.Intervals + i;
  1160.  
  1161.          if (dbg)
  1162.             printf("Consider register %u\n", live->Reg);
  1163.  
  1164.          /* Expire old intervals.  Intervals which have ended with respect
  1165.           * to the live interval can have their remapped registers freed.
  1166.           */
  1167.          {
  1168.             GLint j;
  1169.             for (j = 0; j < (GLint) activeIntervals.Num; j++) {
  1170.                const struct interval *inv = activeIntervals.Intervals + j;
  1171.                if (inv->End >= live->Start) {
  1172.                   /* Stop now.  Since the activeInterval list is sorted
  1173.                    * we know we don't have to go further.
  1174.                    */
  1175.                   break;
  1176.                }
  1177.                else {
  1178.                   /* Interval 'inv' has expired */
  1179.                   const GLint regNew = registerMap[inv->Reg];
  1180.                   ASSERT(regNew >= 0);
  1181.  
  1182.                   if (dbg)
  1183.                      printf("  expire interval for reg %u\n", inv->Reg);
  1184.  
  1185.                   /* remove interval j from active list */
  1186.                   remove_interval(&activeIntervals, inv);
  1187.                   j--;  /* counter-act j++ in for-loop above */
  1188.  
  1189.                   /* return register regNew to the free pool */
  1190.                   if (dbg)
  1191.                      printf("  free reg %d\n", regNew);
  1192.                   ASSERT(usedRegs[regNew] == GL_TRUE);
  1193.                   usedRegs[regNew] = GL_FALSE;
  1194.                }
  1195.             }
  1196.          }
  1197.  
  1198.          /* find a free register for this live interval */
  1199.          {
  1200.             const GLint k = alloc_register(usedRegs);
  1201.             if (k < 0) {
  1202.                /* out of registers, give up */
  1203.                return;
  1204.             }
  1205.             registerMap[live->Reg] = k;
  1206.             maxTemp = MAX2(maxTemp, k);
  1207.             if (dbg)
  1208.                printf("  remap register %u -> %d\n", live->Reg, k);
  1209.          }
  1210.  
  1211.          /* Insert this live interval into the active list which is sorted
  1212.           * by increasing end points.
  1213.           */
  1214.          insert_interval_by_end(&activeIntervals, live);
  1215.       }
  1216.    }
  1217.  
  1218.    if (maxTemp + 1 < (GLint) liveIntervals.Num) {
  1219.       /* OK, we've reduced the number of registers needed.
  1220.        * Scan the program and replace all the old temporary register
  1221.        * indexes with the new indexes.
  1222.        */
  1223.       replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
  1224.  
  1225.       prog->NumTemporaries = maxTemp + 1;
  1226.    }
  1227.  
  1228.    if (dbg) {
  1229.       printf("Optimize: End live-interval register reallocation\n");
  1230.       printf("Num temp regs before: %u  after: %u\n",
  1231.                    liveIntervals.Num, maxTemp + 1);
  1232.       _mesa_print_program(prog);
  1233.    }
  1234. }
  1235.  
  1236.  
  1237. #if 0
  1238. static void
  1239. print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
  1240.    fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
  1241.    _mesa_print_program(program);
  1242.    _mesa_print_program_parameters(ctx, program);
  1243.    fprintf(stderr, "\n\n");
  1244. }
  1245. #endif
  1246.  
  1247. /**
  1248.  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
  1249.  * instruction is the first instruction to write to register T0.  The are
  1250.  * several lowering passes done in GLSL IR (e.g. branches and
  1251.  * relative addressing) that create a large number of conditional assignments
  1252.  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
  1253.  *
  1254.  * Here is why this conversion is safe:
  1255.  * CMP T0, T1 T2 T0 can be expanded to:
  1256.  * if (T1 < 0.0)
  1257.  *      MOV T0, T2;
  1258.  * else
  1259.  *      MOV T0, T0;
  1260.  *
  1261.  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
  1262.  * as the original program.  If (T1 < 0.0) evaluates to false, executing
  1263.  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
  1264.  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
  1265.  * because any instruction that was going to read from T0 after this was going
  1266.  * to read a garbage value anyway.
  1267.  */
  1268. static void
  1269. _mesa_simplify_cmp(struct gl_program * program)
  1270. {
  1271.    GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
  1272.    GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
  1273.    GLuint i;
  1274.  
  1275.    if (dbg) {
  1276.       printf("Optimize: Begin reads without writes\n");
  1277.       _mesa_print_program(program);
  1278.    }
  1279.  
  1280.    for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
  1281.       tempWrites[i] = 0;
  1282.    }
  1283.  
  1284.    for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
  1285.       outputWrites[i] = 0;
  1286.    }
  1287.  
  1288.    for (i = 0; i < program->NumInstructions; i++) {
  1289.       struct prog_instruction *inst = program->Instructions + i;
  1290.       GLuint prevWriteMask;
  1291.  
  1292.       /* Give up if we encounter relative addressing or flow control. */
  1293.       if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
  1294.          return;
  1295.       }
  1296.  
  1297.       if (inst->DstReg.File == PROGRAM_OUTPUT) {
  1298.          assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
  1299.          prevWriteMask = outputWrites[inst->DstReg.Index];
  1300.          outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
  1301.       } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
  1302.          assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
  1303.          prevWriteMask = tempWrites[inst->DstReg.Index];
  1304.          tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
  1305.       } else {
  1306.          /* No other register type can be a destination register. */
  1307.          continue;
  1308.       }
  1309.  
  1310.       /* For a CMP to be considered a conditional write, the destination
  1311.        * register and source register two must be the same. */
  1312.       if (inst->Opcode == OPCODE_CMP
  1313.           && !(inst->DstReg.WriteMask & prevWriteMask)
  1314.           && inst->SrcReg[2].File == inst->DstReg.File
  1315.           && inst->SrcReg[2].Index == inst->DstReg.Index
  1316.           && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
  1317.  
  1318.          inst->Opcode = OPCODE_MOV;
  1319.          inst->SrcReg[0] = inst->SrcReg[1];
  1320.  
  1321.          /* Unused operands are expected to have the file set to
  1322.           * PROGRAM_UNDEFINED.  This is how _mesa_init_instructions initializes
  1323.           * all of the sources.
  1324.           */
  1325.          inst->SrcReg[1].File = PROGRAM_UNDEFINED;
  1326.          inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
  1327.          inst->SrcReg[2].File = PROGRAM_UNDEFINED;
  1328.          inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
  1329.       }
  1330.    }
  1331.    if (dbg) {
  1332.       printf("Optimize: End reads without writes\n");
  1333.       _mesa_print_program(program);
  1334.    }
  1335. }
  1336.  
  1337. /**
  1338.  * Apply optimizations to the given program to eliminate unnecessary
  1339.  * instructions, temp regs, etc.
  1340.  */
  1341. void
  1342. _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
  1343. {
  1344.    GLboolean any_change;
  1345.  
  1346.    _mesa_simplify_cmp(program);
  1347.    /* Stop when no modifications were output */
  1348.    do {
  1349.       any_change = GL_FALSE;
  1350.       _mesa_remove_extra_move_use(program);
  1351.       if (_mesa_remove_dead_code_global(program))
  1352.          any_change = GL_TRUE;
  1353.       if (_mesa_remove_extra_moves(program))
  1354.          any_change = GL_TRUE;
  1355.       if (_mesa_remove_dead_code_local(program))
  1356.          any_change = GL_TRUE;
  1357.  
  1358.       any_change = _mesa_constant_fold(program) || any_change;
  1359.       _mesa_reallocate_registers(program);
  1360.    } while (any_change);
  1361. }
  1362.  
  1363.