Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1901 serge 1
/*
2
 * Mesa 3-D graphics library
3
 * Version:  7.5
4
 *
5
 * Copyright (C) 2009  VMware, Inc.  All Rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the "Software"),
9
 * to deal in the Software without restriction, including without limitation
10
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
 * and/or sell copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included
15
 * in all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
 
25
 
26
 
27
#include "main/glheader.h"
28
#include "main/context.h"
29
#include "main/macros.h"
30
#include "program.h"
31
#include "prog_instruction.h"
32
#include "prog_optimize.h"
33
#include "prog_print.h"
34
 
35
 
36
#define MAX_LOOP_NESTING 50
37
/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
38
 * register allocate many temporary values into that small number of
39
 * temps.  So allow large temporary indices coming into the register
40
 * allocator.
41
 */
42
#define REG_ALLOCATE_MAX_PROGRAM_TEMPS	((1 << INST_INDEX_BITS) - 1)
43
 
44
static GLboolean dbg = GL_FALSE;
45
 
46
#define NO_MASK 0xf
47
 
48
/**
49
 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
50
 * are read from the given src in this instruction, We also provide
51
 * one optional masks which may mask other components in the dst
52
 * register
53
 */
54
static GLuint
55
get_src_arg_mask(const struct prog_instruction *inst,
56
                 GLuint arg, GLuint dst_mask)
57
{
58
   GLuint read_mask, channel_mask;
59
   GLuint comp;
60
 
61
   ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
62
 
63
   /* Form the dst register, find the written channels */
64
   if (inst->CondUpdate) {
65
      channel_mask = WRITEMASK_XYZW;
66
   }
67
   else {
68
      switch (inst->Opcode) {
69
      case OPCODE_MOV:
70
      case OPCODE_MIN:
71
      case OPCODE_MAX:
72
      case OPCODE_ABS:
73
      case OPCODE_ADD:
74
      case OPCODE_MAD:
75
      case OPCODE_MUL:
76
      case OPCODE_SUB:
77
         channel_mask = inst->DstReg.WriteMask & dst_mask;
78
         break;
79
      case OPCODE_RCP:
80
      case OPCODE_SIN:
81
      case OPCODE_COS:
82
      case OPCODE_RSQ:
83
      case OPCODE_POW:
84
      case OPCODE_EX2:
85
      case OPCODE_LOG:
86
         channel_mask = WRITEMASK_X;
87
         break;
88
      case OPCODE_DP2:
89
         channel_mask = WRITEMASK_XY;
90
         break;
91
      case OPCODE_DP3:
92
      case OPCODE_XPD:
93
         channel_mask = WRITEMASK_XYZ;
94
         break;
95
      default:
96
         channel_mask = WRITEMASK_XYZW;
97
         break;
98
      }
99
   }
100
 
101
   /* Now, given the src swizzle and the written channels, find which
102
    * components are actually read
103
    */
104
   read_mask = 0x0;
105
   for (comp = 0; comp < 4; ++comp) {
106
      const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
107
      ASSERT(coord < 4);
108
      if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
109
         read_mask |= 1 << coord;
110
   }
111
 
112
   return read_mask;
113
}
114
 
115
 
116
/**
117
 * For a MOV instruction, compute a write mask when src register also has
118
 * a mask
119
 */
120
static GLuint
121
get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
122
{
123
   const GLuint mask = mov->DstReg.WriteMask;
124
   GLuint comp;
125
   GLuint updated_mask = 0x0;
126
 
127
   ASSERT(mov->Opcode == OPCODE_MOV);
128
 
129
   for (comp = 0; comp < 4; ++comp) {
130
      GLuint src_comp;
131
      if ((mask & (1 << comp)) == 0)
132
         continue;
133
      src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
134
      if ((src_mask & (1 << src_comp)) == 0)
135
         continue;
136
      updated_mask |= 1 << comp;
137
   }
138
 
139
   return updated_mask;
140
}
141
 
142
 
143
/**
144
 * Ensure that the swizzle is regular.  That is, all of the swizzle
145
 * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
146
 */
147
static GLboolean
148
is_swizzle_regular(GLuint swz)
149
{
150
   return GET_SWZ(swz,0) <= SWIZZLE_W &&
151
          GET_SWZ(swz,1) <= SWIZZLE_W &&
152
          GET_SWZ(swz,2) <= SWIZZLE_W &&
153
          GET_SWZ(swz,3) <= SWIZZLE_W;
154
}
155
 
156
 
157
/**
158
 * In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
159
 * \return number of instructions removed
160
 */
161
static GLuint
162
remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
163
{
164
   GLint i, removeEnd = 0, removeCount = 0;
165
   GLuint totalRemoved = 0;
166
 
167
   /* go backward */
168
   for (i = prog->NumInstructions - 1; i >= 0; i--) {
169
      if (removeFlags[i]) {
170
         totalRemoved++;
171
         if (removeCount == 0) {
172
            /* begin a run of instructions to remove */
173
            removeEnd = i;
174
            removeCount = 1;
175
         }
176
         else {
177
            /* extend the run of instructions to remove */
178
            removeCount++;
179
         }
180
      }
181
      else {
182
         /* don't remove this instruction, but check if the preceeding
183
          * instructions are to be removed.
184
          */
185
         if (removeCount > 0) {
186
            GLint removeStart = removeEnd - removeCount + 1;
187
            _mesa_delete_instructions(prog, removeStart, removeCount);
188
            removeStart = removeCount = 0; /* reset removal info */
189
         }
190
      }
191
   }
192
   /* Finish removing if the first instruction was to be removed. */
193
   if (removeCount > 0) {
194
      GLint removeStart = removeEnd - removeCount + 1;
195
      _mesa_delete_instructions(prog, removeStart, removeCount);
196
   }
197
   return totalRemoved;
198
}
199
 
200
 
201
/**
202
 * Remap register indexes according to map.
203
 * \param prog  the program to search/replace
204
 * \param file  the type of register file to search/replace
205
 * \param map  maps old register indexes to new indexes
206
 */
207
static void
208
replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
209
{
210
   GLuint i;
211
 
212
   for (i = 0; i < prog->NumInstructions; i++) {
213
      struct prog_instruction *inst = prog->Instructions + i;
214
      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
215
      GLuint j;
216
      for (j = 0; j < numSrc; j++) {
217
         if (inst->SrcReg[j].File == file) {
218
            GLuint index = inst->SrcReg[j].Index;
219
            ASSERT(map[index] >= 0);
220
            inst->SrcReg[j].Index = map[index];
221
         }
222
      }
223
      if (inst->DstReg.File == file) {
224
         const GLuint index = inst->DstReg.Index;
225
         ASSERT(map[index] >= 0);
226
         inst->DstReg.Index = map[index];
227
      }
228
   }
229
}
230
 
231
 
232
/**
233
 * Remove dead instructions from the given program.
234
 * This is very primitive for now.  Basically look for temp registers
235
 * that are written to but never read.  Remove any instructions that
236
 * write to such registers.  Be careful with condition code setters.
237
 */
238
static GLboolean
239
_mesa_remove_dead_code_global(struct gl_program *prog)
240
{
241
   GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
242
   GLboolean *removeInst; /* per-instruction removal flag */
243
   GLuint i, rem = 0, comp;
244
 
245
   memset(tempRead, 0, sizeof(tempRead));
246
 
247
   if (dbg) {
248
      printf("Optimize: Begin dead code removal\n");
249
      /*_mesa_print_program(prog);*/
250
   }
251
 
252
   removeInst = (GLboolean *)
253
      calloc(1, prog->NumInstructions * sizeof(GLboolean));
254
 
255
   /* Determine which temps are read and written */
256
   for (i = 0; i < prog->NumInstructions; i++) {
257
      const struct prog_instruction *inst = prog->Instructions + i;
258
      const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
259
      GLuint j;
260
 
261
      /* check src regs */
262
      for (j = 0; j < numSrc; j++) {
263
         if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
264
            const GLuint index = inst->SrcReg[j].Index;
265
            GLuint read_mask;
266
            ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
267
	    read_mask = get_src_arg_mask(inst, j, NO_MASK);
268
 
269
            if (inst->SrcReg[j].RelAddr) {
270
               if (dbg)
271
                  printf("abort remove dead code (indirect temp)\n");
272
               goto done;
273
            }
274
 
275
	    for (comp = 0; comp < 4; comp++) {
276
	       const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
277
	       ASSERT(swz < 4);
278
               if ((read_mask & (1 << swz)) == 0)
279
		  continue;
280
               if (swz <= SWIZZLE_W)
281
                  tempRead[index][swz] = GL_TRUE;
282
	    }
283
         }
284
      }
285
 
286
      /* check dst reg */
287
      if (inst->DstReg.File == PROGRAM_TEMPORARY) {
288
         const GLuint index = inst->DstReg.Index;
289
         ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
290
 
291
         if (inst->DstReg.RelAddr) {
292
            if (dbg)
293
               printf("abort remove dead code (indirect temp)\n");
294
            goto done;
295
         }
296
 
297
         if (inst->CondUpdate) {
298
            /* If we're writing to this register and setting condition
299
             * codes we cannot remove the instruction.  Prevent removal
300
             * by setting the 'read' flag.
301
             */
302
            tempRead[index][0] = GL_TRUE;
303
            tempRead[index][1] = GL_TRUE;
304
            tempRead[index][2] = GL_TRUE;
305
            tempRead[index][3] = GL_TRUE;
306
         }
307
      }
308
   }
309
 
310
   /* find instructions that write to dead registers, flag for removal */
311
   for (i = 0; i < prog->NumInstructions; i++) {
312
      struct prog_instruction *inst = prog->Instructions + i;
313
      const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
314
 
315
      if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
316
         GLint chan, index = inst->DstReg.Index;
317
 
318
	 for (chan = 0; chan < 4; chan++) {
319
	    if (!tempRead[index][chan] &&
320
		inst->DstReg.WriteMask & (1 << chan)) {
321
	       if (dbg) {
322
		  printf("Remove writemask on %u.%c\n", i,
323
			       chan == 3 ? 'w' : 'x' + chan);
324
	       }
325
	       inst->DstReg.WriteMask &= ~(1 << chan);
326
	       rem++;
327
	    }
328
	 }
329
 
330
	 if (inst->DstReg.WriteMask == 0) {
331
	    /* If we cleared all writes, the instruction can be removed. */
332
	    if (dbg)
333
	       printf("Remove instruction %u: \n", i);
334
	    removeInst[i] = GL_TRUE;
335
	 }
336
      }
337
   }
338
 
339
   /* now remove the instructions which aren't needed */
340
   rem = remove_instructions(prog, removeInst);
341
 
342
   if (dbg) {
343
      printf("Optimize: End dead code removal.\n");
344
      printf("  %u channel writes removed\n", rem);
345
      printf("  %u instructions removed\n", rem);
346
      /*_mesa_print_program(prog);*/
347
   }
348
 
349
done:
350
   free(removeInst);
351
   return rem != 0;
352
}
353
 
354
 
355
enum inst_use
356
{
357
   READ,
358
   WRITE,
359
   FLOW,
360
   END
361
};
362
 
363
 
364
/**
365
 * Scan forward in program from 'start' for the next occurances of TEMP[index].
366
 * We look if an instruction reads the component given by the masks and if they
367
 * are overwritten.
368
 * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
369
 * that we can't look further.
370
 */
371
static enum inst_use
372
find_next_use(const struct gl_program *prog,
373
              GLuint start,
374
              GLuint index,
375
              GLuint mask)
376
{
377
   GLuint i;
378
 
379
   for (i = start; i < prog->NumInstructions; i++) {
380
      const struct prog_instruction *inst = prog->Instructions + i;
381
      switch (inst->Opcode) {
382
      case OPCODE_BGNLOOP:
383
      case OPCODE_BGNSUB:
384
      case OPCODE_BRA:
385
      case OPCODE_CAL:
386
      case OPCODE_CONT:
387
      case OPCODE_IF:
388
      case OPCODE_ELSE:
389
      case OPCODE_ENDIF:
390
      case OPCODE_ENDLOOP:
391
      case OPCODE_ENDSUB:
392
      case OPCODE_RET:
393
         return FLOW;
394
      case OPCODE_END:
395
         return END;
396
      default:
397
         {
398
            const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
399
            GLuint j;
400
            for (j = 0; j < numSrc; j++) {
401
               if (inst->SrcReg[j].RelAddr ||
402
                   (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
403
                   inst->SrcReg[j].Index == index &&
404
                   (get_src_arg_mask(inst,j,NO_MASK) & mask)))
405
                  return READ;
406
            }
407
            if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
408
                inst->DstReg.File == PROGRAM_TEMPORARY &&
409
                inst->DstReg.Index == index) {
410
               mask &= ~inst->DstReg.WriteMask;
411
               if (mask == 0)
412
                  return WRITE;
413
            }
414
         }
415
      }
416
   }
417
   return END;
418
}
419
 
420
 
421
/**
422
 * Is the given instruction opcode a flow-control opcode?
423
 * XXX maybe move this into prog_instruction.[ch]
424
 */
425
static GLboolean
426
_mesa_is_flow_control_opcode(enum prog_opcode opcode)
427
{
428
   switch (opcode) {
429
   case OPCODE_BGNLOOP:
430
   case OPCODE_BGNSUB:
431
   case OPCODE_BRA:
432
   case OPCODE_CAL:
433
   case OPCODE_CONT:
434
   case OPCODE_IF:
435
   case OPCODE_ELSE:
436
   case OPCODE_END:
437
   case OPCODE_ENDIF:
438
   case OPCODE_ENDLOOP:
439
   case OPCODE_ENDSUB:
440
   case OPCODE_RET:
441
      return GL_TRUE;
442
   default:
443
      return GL_FALSE;
444
   }
445
}
446
 
447
 
448
/**
449
 * Test if the given instruction is a simple MOV (no conditional updating,
450
 * not relative addressing, no negation/abs, etc).
451
 */
452
static GLboolean
453
can_downward_mov_be_modifed(const struct prog_instruction *mov)
454
{
455
   return
456
      mov->Opcode == OPCODE_MOV &&
457
      mov->CondUpdate == GL_FALSE &&
458
      mov->SrcReg[0].RelAddr == 0 &&
459
      mov->SrcReg[0].Negate == 0 &&
460
      mov->SrcReg[0].Abs == 0 &&
461
      mov->SrcReg[0].HasIndex2 == 0 &&
462
      mov->SrcReg[0].RelAddr2 == 0 &&
463
      mov->DstReg.RelAddr == 0 &&
464
      mov->DstReg.CondMask == COND_TR &&
465
      mov->SaturateMode == SATURATE_OFF;
466
}
467
 
468
 
469
static GLboolean
470
can_upward_mov_be_modifed(const struct prog_instruction *mov)
471
{
472
   return
473
      can_downward_mov_be_modifed(mov) &&
474
      mov->DstReg.File == PROGRAM_TEMPORARY;
475
}
476
 
477
 
478
/**
479
 * Try to remove use of extraneous MOV instructions, to free them up for dead
480
 * code removal.
481
 */
482
static void
483
_mesa_remove_extra_move_use(struct gl_program *prog)
484
{
485
   GLuint i, j;
486
 
487
   if (dbg) {
488
      printf("Optimize: Begin remove extra move use\n");
489
      _mesa_print_program(prog);
490
   }
491
 
492
   /*
493
    * Look for sequences such as this:
494
    *    MOV tmpX, arg0;
495
    *    ...
496
    *    FOO tmpY, tmpX, arg1;
497
    * and convert into:
498
    *    MOV tmpX, arg0;
499
    *    ...
500
    *    FOO tmpY, arg0, arg1;
501
    */
502
 
503
   for (i = 0; i + 1 < prog->NumInstructions; i++) {
504
      const struct prog_instruction *mov = prog->Instructions + i;
505
      GLuint dst_mask, src_mask;
506
      if (can_upward_mov_be_modifed(mov) == GL_FALSE)
507
         continue;
508
 
509
      /* Scanning the code, we maintain the components which are still active in
510
       * these two masks
511
       */
512
      dst_mask = mov->DstReg.WriteMask;
513
      src_mask = get_src_arg_mask(mov, 0, NO_MASK);
514
 
515
      /* Walk through remaining instructions until the or src reg gets
516
       * rewritten or we get into some flow-control, eliminating the use of
517
       * this MOV.
518
       */
519
      for (j = i + 1; j < prog->NumInstructions; j++) {
520
	 struct prog_instruction *inst2 = prog->Instructions + j;
521
         GLuint arg;
522
 
523
	 if (_mesa_is_flow_control_opcode(inst2->Opcode))
524
	     break;
525
 
526
	 /* First rewrite this instruction's args if appropriate. */
527
	 for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
528
	    GLuint comp, read_mask;
529
 
530
	    if (inst2->SrcReg[arg].File != mov->DstReg.File ||
531
		inst2->SrcReg[arg].Index != mov->DstReg.Index ||
532
		inst2->SrcReg[arg].RelAddr ||
533
		inst2->SrcReg[arg].Abs)
534
	       continue;
535
            read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
536
 
537
	    /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
538
             * components read still come from the mov instructions
539
             */
540
            if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
541
               (read_mask & dst_mask) == read_mask) {
542
               for (comp = 0; comp < 4; comp++) {
543
                  const GLuint inst2_swz =
544
                     GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
545
                  const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
546
                  inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
547
                  inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
548
                  inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
549
                                                  inst2_swz) & 0x1) << comp);
550
               }
551
               inst2->SrcReg[arg].File = mov->SrcReg[0].File;
552
               inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
553
            }
554
	 }
555
 
556
	 /* The source of MOV is written. This potentially deactivates some
557
          * components from the src and dst of the MOV instruction
558
          */
559
	 if (inst2->DstReg.File == mov->DstReg.File &&
560
	     (inst2->DstReg.RelAddr ||
561
	      inst2->DstReg.Index == mov->DstReg.Index)) {
562
            dst_mask &= ~inst2->DstReg.WriteMask;
563
            src_mask = get_src_arg_mask(mov, 0, dst_mask);
564
         }
565
 
566
         /* Idem when the destination of mov is written */
567
	 if (inst2->DstReg.File == mov->SrcReg[0].File &&
568
	     (inst2->DstReg.RelAddr ||
569
	      inst2->DstReg.Index == mov->SrcReg[0].Index)) {
570
            src_mask &= ~inst2->DstReg.WriteMask;
571
            dst_mask &= get_dst_mask_for_mov(mov, src_mask);
572
         }
573
         if (dst_mask == 0)
574
            break;
575
      }
576
   }
577
 
578
   if (dbg) {
579
      printf("Optimize: End remove extra move use.\n");
580
      /*_mesa_print_program(prog);*/
581
   }
582
}
583
 
584
 
585
/**
586
 * Complements dead_code_global. Try to remove code in block of code by
587
 * carefully monitoring the swizzles. Both functions should be merged into one
588
 * with a proper control flow graph
589
 */
590
static GLboolean
591
_mesa_remove_dead_code_local(struct gl_program *prog)
592
{
593
   GLboolean *removeInst;
594
   GLuint i, arg, rem = 0;
595
 
596
   removeInst = (GLboolean *)
597
      calloc(1, prog->NumInstructions * sizeof(GLboolean));
598
 
599
   for (i = 0; i < prog->NumInstructions; i++) {
600
      const struct prog_instruction *inst = prog->Instructions + i;
601
      const GLuint index = inst->DstReg.Index;
602
      const GLuint mask = inst->DstReg.WriteMask;
603
      enum inst_use use;
604
 
605
      /* We must deactivate the pass as soon as some indirection is used */
606
      if (inst->DstReg.RelAddr)
607
         goto done;
608
      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
609
         if (inst->SrcReg[arg].RelAddr)
610
            goto done;
611
 
612
      if (_mesa_is_flow_control_opcode(inst->Opcode) ||
613
          _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
614
          inst->DstReg.File != PROGRAM_TEMPORARY ||
615
          inst->DstReg.RelAddr)
616
         continue;
617
 
618
      use = find_next_use(prog, i+1, index, mask);
619
      if (use == WRITE || use == END)
620
         removeInst[i] = GL_TRUE;
621
   }
622
 
623
   rem = remove_instructions(prog, removeInst);
624
 
625
done:
626
   free(removeInst);
627
   return rem != 0;
628
}
629
 
630
 
631
/**
632
 * Try to inject the destination of mov as the destination of inst and recompute
633
 * the swizzles operators for the sources of inst if required. Return GL_TRUE
634
 * of the substitution was possible, GL_FALSE otherwise
635
 */
636
static GLboolean
637
_mesa_merge_mov_into_inst(struct prog_instruction *inst,
638
                          const struct prog_instruction *mov)
639
{
640
   /* Indirection table which associates destination and source components for
641
    * the mov instruction
642
    */
643
   const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
644
 
645
   /* Some components are not written by inst. We cannot remove the mov */
646
   if (mask != (inst->DstReg.WriteMask & mask))
647
      return GL_FALSE;
648
 
649
   /* Depending on the instruction, we may need to recompute the swizzles.
650
    * Also, some other instructions (like TEX) are not linear. We will only
651
    * consider completely active sources and destinations
652
    */
653
   switch (inst->Opcode) {
654
 
655
   /* Carstesian instructions: we compute the swizzle */
656
   case OPCODE_MOV:
657
   case OPCODE_MIN:
658
   case OPCODE_MAX:
659
   case OPCODE_ABS:
660
   case OPCODE_ADD:
661
   case OPCODE_MAD:
662
   case OPCODE_MUL:
663
   case OPCODE_SUB:
664
   {
665
      GLuint dst_to_src_comp[4] = {0,0,0,0};
666
      GLuint dst_comp, arg;
667
      for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
668
         if (mov->DstReg.WriteMask & (1 << dst_comp)) {
669
            const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
670
            ASSERT(src_comp < 4);
671
            dst_to_src_comp[dst_comp] = src_comp;
672
         }
673
      }
674
 
675
      /* Patch each source of the instruction */
676
      for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
677
         const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
678
         inst->SrcReg[arg].Swizzle = 0;
679
 
680
         /* Reset each active component of the swizzle */
681
         for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
682
            GLuint src_comp, arg_comp;
683
            if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
684
               continue;
685
            src_comp = dst_to_src_comp[dst_comp];
686
            ASSERT(src_comp < 4);
687
            arg_comp = GET_SWZ(arg_swz, src_comp);
688
            ASSERT(arg_comp < 4);
689
            inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
690
         }
691
      }
692
      inst->DstReg = mov->DstReg;
693
      return GL_TRUE;
694
   }
695
 
696
   /* Dot products and scalar instructions: we only change the destination */
697
   case OPCODE_RCP:
698
   case OPCODE_SIN:
699
   case OPCODE_COS:
700
   case OPCODE_RSQ:
701
   case OPCODE_POW:
702
   case OPCODE_EX2:
703
   case OPCODE_LOG:
704
   case OPCODE_DP2:
705
   case OPCODE_DP3:
706
   case OPCODE_DP4:
707
      inst->DstReg = mov->DstReg;
708
      return GL_TRUE;
709
 
710
   /* All other instructions require fully active components with no swizzle */
711
   default:
712
      if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
713
          inst->DstReg.WriteMask != WRITEMASK_XYZW)
714
         return GL_FALSE;
715
      inst->DstReg = mov->DstReg;
716
      return GL_TRUE;
717
   }
718
}
719
 
720
 
721
/**
722
 * Try to remove extraneous MOV instructions from the given program.
723
 */
724
static GLboolean
725
_mesa_remove_extra_moves(struct gl_program *prog)
726
{
727
   GLboolean *removeInst; /* per-instruction removal flag */
728
   GLuint i, rem = 0, nesting = 0;
729
 
730
   if (dbg) {
731
      printf("Optimize: Begin remove extra moves\n");
732
      _mesa_print_program(prog);
733
   }
734
 
735
   removeInst = (GLboolean *)
736
      calloc(1, prog->NumInstructions * sizeof(GLboolean));
737
 
738
   /*
739
    * Look for sequences such as this:
740
    *    FOO tmpX, arg0, arg1;
741
    *    MOV tmpY, tmpX;
742
    * and convert into:
743
    *    FOO tmpY, arg0, arg1;
744
    */
745
 
746
   for (i = 0; i < prog->NumInstructions; i++) {
747
      const struct prog_instruction *mov = prog->Instructions + i;
748
 
749
      switch (mov->Opcode) {
750
      case OPCODE_BGNLOOP:
751
      case OPCODE_BGNSUB:
752
      case OPCODE_IF:
753
         nesting++;
754
         break;
755
      case OPCODE_ENDLOOP:
756
      case OPCODE_ENDSUB:
757
      case OPCODE_ENDIF:
758
         nesting--;
759
         break;
760
      case OPCODE_MOV:
761
         if (i > 0 &&
762
             can_downward_mov_be_modifed(mov) &&
763
             mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
764
             nesting == 0)
765
         {
766
 
767
            /* see if this MOV can be removed */
768
            const GLuint id = mov->SrcReg[0].Index;
769
            struct prog_instruction *prevInst;
770
            GLuint prevI;
771
 
772
            /* get pointer to previous instruction */
773
            prevI = i - 1;
774
            while (prevI > 0 && removeInst[prevI])
775
               prevI--;
776
            prevInst = prog->Instructions + prevI;
777
 
778
            if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
779
                prevInst->DstReg.Index == id &&
780
                prevInst->DstReg.RelAddr == 0 &&
781
                prevInst->DstReg.CondSrc == 0 &&
782
                prevInst->DstReg.CondMask == COND_TR) {
783
 
784
               const GLuint dst_mask = prevInst->DstReg.WriteMask;
785
               enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
786
 
787
               if (next_use == WRITE || next_use == END) {
788
                  /* OK, we can safely remove this MOV instruction.
789
                   * Transform:
790
                   *   prevI: FOO tempIndex, x, y;
791
                   *       i: MOV z, tempIndex;
792
                   * Into:
793
                   *   prevI: FOO z, x, y;
794
                   */
795
                  if (_mesa_merge_mov_into_inst(prevInst, mov)) {
796
                     removeInst[i] = GL_TRUE;
797
                     if (dbg) {
798
                        printf("Remove MOV at %u\n", i);
799
                        printf("new prev inst %u: ", prevI);
800
                        _mesa_print_instruction(prevInst);
801
                     }
802
                  }
803
               }
804
            }
805
         }
806
         break;
807
      default:
808
         ; /* nothing */
809
      }
810
   }
811
 
812
   /* now remove the instructions which aren't needed */
813
   rem = remove_instructions(prog, removeInst);
814
 
815
   free(removeInst);
816
 
817
   if (dbg) {
818
      printf("Optimize: End remove extra moves.  %u instructions removed\n", rem);
819
      /*_mesa_print_program(prog);*/
820
   }
821
 
822
   return rem != 0;
823
}
824
 
825
 
826
/** A live register interval */
827
struct interval
828
{
829
   GLuint Reg;         /** The temporary register index */
830
   GLuint Start, End;  /** Start/end instruction numbers */
831
};
832
 
833
 
834
/** A list of register intervals */
835
struct interval_list
836
{
837
   GLuint Num;
838
   struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
839
};
840
 
841
 
842
static void
843
append_interval(struct interval_list *list, const struct interval *inv)
844
{
845
   list->Intervals[list->Num++] = *inv;
846
}
847
 
848
 
849
/** Insert interval inv into list, sorted by interval end */
850
static void
851
insert_interval_by_end(struct interval_list *list, const struct interval *inv)
852
{
853
   /* XXX we could do a binary search insertion here since list is sorted */
854
   GLint i = list->Num - 1;
855
   while (i >= 0 && list->Intervals[i].End > inv->End) {
856
      list->Intervals[i + 1] = list->Intervals[i];
857
      i--;
858
   }
859
   list->Intervals[i + 1] = *inv;
860
   list->Num++;
861
 
862
#ifdef DEBUG
863
   {
864
      GLuint i;
865
      for (i = 0; i + 1 < list->Num; i++) {
866
         ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
867
      }
868
   }
869
#endif
870
}
871
 
872
 
873
/** Remove the given interval from the interval list */
874
static void
875
remove_interval(struct interval_list *list, const struct interval *inv)
876
{
877
   /* XXX we could binary search since list is sorted */
878
   GLuint k;
879
   for (k = 0; k < list->Num; k++) {
880
      if (list->Intervals[k].Reg == inv->Reg) {
881
         /* found, remove it */
882
         ASSERT(list->Intervals[k].Start == inv->Start);
883
         ASSERT(list->Intervals[k].End == inv->End);
884
         while (k < list->Num - 1) {
885
            list->Intervals[k] = list->Intervals[k + 1];
886
            k++;
887
         }
888
         list->Num--;
889
         return;
890
      }
891
   }
892
}
893
 
894
 
895
/** called by qsort() */
896
static int
897
compare_start(const void *a, const void *b)
898
{
899
   const struct interval *ia = (const struct interval *) a;
900
   const struct interval *ib = (const struct interval *) b;
901
   if (ia->Start < ib->Start)
902
      return -1;
903
   else if (ia->Start > ib->Start)
904
      return +1;
905
   else
906
      return 0;
907
}
908
 
909
 
910
/** sort the interval list according to interval starts */
911
static void
912
sort_interval_list_by_start(struct interval_list *list)
913
{
914
   qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start);
915
#ifdef DEBUG
916
   {
917
      GLuint i;
918
      for (i = 0; i + 1 < list->Num; i++) {
919
         ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
920
      }
921
   }
922
#endif
923
}
924
 
925
struct loop_info
926
{
927
   GLuint Start, End;  /**< Start, end instructions of loop */
928
};
929
 
930
/**
931
 * Update the intermediate interval info for register 'index' and
932
 * instruction 'ic'.
933
 */
934
static void
935
update_interval(GLint intBegin[], GLint intEnd[],
936
		struct loop_info *loopStack, GLuint loopStackDepth,
937
		GLuint index, GLuint ic)
938
{
939
   int i;
940
 
941
   /* If the register is used in a loop, extend its lifetime through the end
942
    * of the outermost loop that doesn't contain its definition.
943
    */
944
   for (i = 0; i < loopStackDepth; i++) {
945
      if (intBegin[index] < loopStack[i].Start) {
946
	 ic = loopStack[i].End;
947
	 break;
948
      }
949
   }
950
 
951
   ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
952
   if (intBegin[index] == -1) {
953
      ASSERT(intEnd[index] == -1);
954
      intBegin[index] = intEnd[index] = ic;
955
   }
956
   else {
957
      intEnd[index] = ic;
958
   }
959
}
960
 
961
 
962
/**
963
 * Find first/last instruction that references each temporary register.
964
 */
965
GLboolean
966
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
967
                          GLuint numInstructions,
968
                          GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
969
                          GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
970
{
971
   struct loop_info loopStack[MAX_LOOP_NESTING];
972
   GLuint loopStackDepth = 0;
973
   GLuint i;
974
 
975
   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
976
      intBegin[i] = intEnd[i] = -1;
977
   }
978
 
979
   /* Scan instructions looking for temporary registers */
980
   for (i = 0; i < numInstructions; i++) {
981
      const struct prog_instruction *inst = instructions + i;
982
      if (inst->Opcode == OPCODE_BGNLOOP) {
983
         loopStack[loopStackDepth].Start = i;
984
         loopStack[loopStackDepth].End = inst->BranchTarget;
985
         loopStackDepth++;
986
      }
987
      else if (inst->Opcode == OPCODE_ENDLOOP) {
988
         loopStackDepth--;
989
      }
990
      else if (inst->Opcode == OPCODE_CAL) {
991
         return GL_FALSE;
992
      }
993
      else {
994
         const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/
995
         GLuint j;
996
         for (j = 0; j < numSrc; j++) {
997
            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
998
               const GLuint index = inst->SrcReg[j].Index;
999
               if (inst->SrcReg[j].RelAddr)
1000
                  return GL_FALSE;
1001
               update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1002
			       index, i);
1003
            }
1004
         }
1005
         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
1006
            const GLuint index = inst->DstReg.Index;
1007
            if (inst->DstReg.RelAddr)
1008
               return GL_FALSE;
1009
            update_interval(intBegin, intEnd, loopStack, loopStackDepth,
1010
			    index, i);
1011
         }
1012
      }
1013
   }
1014
 
1015
   return GL_TRUE;
1016
}
1017
 
1018
 
1019
/**
1020
 * Find the live intervals for each temporary register in the program.
1021
 * For register R, the interval [A,B] indicates that R is referenced
1022
 * from instruction A through instruction B.
1023
 * Special consideration is needed for loops and subroutines.
1024
 * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason
1025
 */
1026
static GLboolean
1027
find_live_intervals(struct gl_program *prog,
1028
                    struct interval_list *liveIntervals)
1029
{
1030
   GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1031
   GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1032
   GLuint i;
1033
 
1034
   /*
1035
    * Note: we'll return GL_FALSE below if we find relative indexing
1036
    * into the TEMP register file.  We can't handle that yet.
1037
    * We also give up on subroutines for now.
1038
    */
1039
 
1040
   if (dbg) {
1041
      printf("Optimize: Begin find intervals\n");
1042
   }
1043
 
1044
   /* build intermediate arrays */
1045
   if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
1046
                                  intBegin, intEnd))
1047
      return GL_FALSE;
1048
 
1049
   /* Build live intervals list from intermediate arrays */
1050
   liveIntervals->Num = 0;
1051
   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
1052
      if (intBegin[i] >= 0) {
1053
         struct interval inv;
1054
         inv.Reg = i;
1055
         inv.Start = intBegin[i];
1056
         inv.End = intEnd[i];
1057
         append_interval(liveIntervals, &inv);
1058
      }
1059
   }
1060
 
1061
   /* Sort the list according to interval starts */
1062
   sort_interval_list_by_start(liveIntervals);
1063
 
1064
   if (dbg) {
1065
      /* print interval info */
1066
      for (i = 0; i < liveIntervals->Num; i++) {
1067
         const struct interval *inv = liveIntervals->Intervals + i;
1068
         printf("Reg[%d] live [%d, %d]:",
1069
                      inv->Reg, inv->Start, inv->End);
1070
         if (1) {
1071
            GLuint j;
1072
            for (j = 0; j < inv->Start; j++)
1073
               printf(" ");
1074
            for (j = inv->Start; j <= inv->End; j++)
1075
               printf("x");
1076
         }
1077
         printf("\n");
1078
      }
1079
   }
1080
 
1081
   return GL_TRUE;
1082
}
1083
 
1084
 
1085
/** Scan the array of used register flags to find free entry */
1086
static GLint
1087
alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
1088
{
1089
   GLuint k;
1090
   for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
1091
      if (!usedRegs[k]) {
1092
         usedRegs[k] = GL_TRUE;
1093
         return k;
1094
      }
1095
   }
1096
   return -1;
1097
}
1098
 
1099
 
1100
/**
1101
 * This function implements "Linear Scan Register Allocation" to reduce
1102
 * the number of temporary registers used by the program.
1103
 *
1104
 * We compute the "live interval" for all temporary registers then
1105
 * examine the overlap of the intervals to allocate new registers.
1106
 * Basically, if two intervals do not overlap, they can use the same register.
1107
 */
1108
static void
1109
_mesa_reallocate_registers(struct gl_program *prog)
1110
{
1111
   struct interval_list liveIntervals;
1112
   GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1113
   GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
1114
   GLuint i;
1115
   GLint maxTemp = -1;
1116
 
1117
   if (dbg) {
1118
      printf("Optimize: Begin live-interval register reallocation\n");
1119
      _mesa_print_program(prog);
1120
   }
1121
 
1122
   for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
1123
      registerMap[i] = -1;
1124
      usedRegs[i] = GL_FALSE;
1125
   }
1126
 
1127
   if (!find_live_intervals(prog, &liveIntervals)) {
1128
      if (dbg)
1129
         printf("Aborting register reallocation\n");
1130
      return;
1131
   }
1132
 
1133
   {
1134
      struct interval_list activeIntervals;
1135
      activeIntervals.Num = 0;
1136
 
1137
      /* loop over live intervals, allocating a new register for each */
1138
      for (i = 0; i < liveIntervals.Num; i++) {
1139
         const struct interval *live = liveIntervals.Intervals + i;
1140
 
1141
         if (dbg)
1142
            printf("Consider register %u\n", live->Reg);
1143
 
1144
         /* Expire old intervals.  Intervals which have ended with respect
1145
          * to the live interval can have their remapped registers freed.
1146
          */
1147
         {
1148
            GLint j;
1149
            for (j = 0; j < (GLint) activeIntervals.Num; j++) {
1150
               const struct interval *inv = activeIntervals.Intervals + j;
1151
               if (inv->End >= live->Start) {
1152
                  /* Stop now.  Since the activeInterval list is sorted
1153
                   * we know we don't have to go further.
1154
                   */
1155
                  break;
1156
               }
1157
               else {
1158
                  /* Interval 'inv' has expired */
1159
                  const GLint regNew = registerMap[inv->Reg];
1160
                  ASSERT(regNew >= 0);
1161
 
1162
                  if (dbg)
1163
                     printf("  expire interval for reg %u\n", inv->Reg);
1164
 
1165
                  /* remove interval j from active list */
1166
                  remove_interval(&activeIntervals, inv);
1167
                  j--;  /* counter-act j++ in for-loop above */
1168
 
1169
                  /* return register regNew to the free pool */
1170
                  if (dbg)
1171
                     printf("  free reg %d\n", regNew);
1172
                  ASSERT(usedRegs[regNew] == GL_TRUE);
1173
                  usedRegs[regNew] = GL_FALSE;
1174
               }
1175
            }
1176
         }
1177
 
1178
         /* find a free register for this live interval */
1179
         {
1180
            const GLint k = alloc_register(usedRegs);
1181
            if (k < 0) {
1182
               /* out of registers, give up */
1183
               return;
1184
            }
1185
            registerMap[live->Reg] = k;
1186
            maxTemp = MAX2(maxTemp, k);
1187
            if (dbg)
1188
               printf("  remap register %u -> %d\n", live->Reg, k);
1189
         }
1190
 
1191
         /* Insert this live interval into the active list which is sorted
1192
          * by increasing end points.
1193
          */
1194
         insert_interval_by_end(&activeIntervals, live);
1195
      }
1196
   }
1197
 
1198
   if (maxTemp + 1 < (GLint) liveIntervals.Num) {
1199
      /* OK, we've reduced the number of registers needed.
1200
       * Scan the program and replace all the old temporary register
1201
       * indexes with the new indexes.
1202
       */
1203
      replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
1204
 
1205
      prog->NumTemporaries = maxTemp + 1;
1206
   }
1207
 
1208
   if (dbg) {
1209
      printf("Optimize: End live-interval register reallocation\n");
1210
      printf("Num temp regs before: %u  after: %u\n",
1211
                   liveIntervals.Num, maxTemp + 1);
1212
      _mesa_print_program(prog);
1213
   }
1214
}
1215
 
1216
 
1217
#if 0
1218
static void
1219
print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
1220
   fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
1221
   _mesa_print_program(program);
1222
   _mesa_print_program_parameters(ctx, program);
1223
   fprintf(stderr, "\n\n");
1224
}
1225
#endif
1226
 
1227
 
1228
/**
1229
 * Apply optimizations to the given program to eliminate unnecessary
1230
 * instructions, temp regs, etc.
1231
 */
1232
void
1233
_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
1234
{
1235
   GLboolean any_change;
1236
 
1237
   /* Stop when no modifications were output */
1238
   do {
1239
      any_change = GL_FALSE;
1240
      _mesa_remove_extra_move_use(program);
1241
      if (_mesa_remove_dead_code_global(program))
1242
         any_change = GL_TRUE;
1243
      if (_mesa_remove_extra_moves(program))
1244
         any_change = GL_TRUE;
1245
      if (_mesa_remove_dead_code_local(program))
1246
         any_change = GL_TRUE;
1247
      _mesa_reallocate_registers(program);
1248
   } while (any_change);
1249
}
1250