Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1901 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * Version: 7.5 |
||
4 | * |
||
5 | * Copyright (C) 2009 VMware, Inc. All Rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
8 | * copy of this software and associated documentation files (the "Software"), |
||
9 | * to deal in the Software without restriction, including without limitation |
||
10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
11 | * and/or sell copies of the Software, and to permit persons to whom the |
||
12 | * Software is furnished to do so, subject to the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice shall be included |
||
15 | * in all copies or substantial portions of the Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
18 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * VMWARE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN |
||
21 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||
22 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
23 | */ |
||
24 | |||
25 | |||
26 | |||
27 | #include "main/glheader.h" |
||
28 | #include "main/context.h" |
||
29 | #include "main/macros.h" |
||
30 | #include "program.h" |
||
31 | #include "prog_instruction.h" |
||
32 | #include "prog_optimize.h" |
||
33 | #include "prog_print.h" |
||
34 | |||
35 | |||
36 | #define MAX_LOOP_NESTING 50 |
||
37 | /* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to |
||
38 | * register allocate many temporary values into that small number of |
||
39 | * temps. So allow large temporary indices coming into the register |
||
40 | * allocator. |
||
41 | */ |
||
42 | #define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1) |
||
43 | |||
44 | static GLboolean dbg = GL_FALSE; |
||
45 | |||
46 | #define NO_MASK 0xf |
||
47 | |||
48 | /** |
||
49 | * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which |
||
50 | * are read from the given src in this instruction, We also provide |
||
51 | * one optional masks which may mask other components in the dst |
||
52 | * register |
||
53 | */ |
||
54 | static GLuint |
||
55 | get_src_arg_mask(const struct prog_instruction *inst, |
||
56 | GLuint arg, GLuint dst_mask) |
||
57 | { |
||
58 | GLuint read_mask, channel_mask; |
||
59 | GLuint comp; |
||
60 | |||
61 | ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); |
||
62 | |||
63 | /* Form the dst register, find the written channels */ |
||
64 | if (inst->CondUpdate) { |
||
65 | channel_mask = WRITEMASK_XYZW; |
||
66 | } |
||
67 | else { |
||
68 | switch (inst->Opcode) { |
||
69 | case OPCODE_MOV: |
||
70 | case OPCODE_MIN: |
||
71 | case OPCODE_MAX: |
||
72 | case OPCODE_ABS: |
||
73 | case OPCODE_ADD: |
||
74 | case OPCODE_MAD: |
||
75 | case OPCODE_MUL: |
||
76 | case OPCODE_SUB: |
||
77 | channel_mask = inst->DstReg.WriteMask & dst_mask; |
||
78 | break; |
||
79 | case OPCODE_RCP: |
||
80 | case OPCODE_SIN: |
||
81 | case OPCODE_COS: |
||
82 | case OPCODE_RSQ: |
||
83 | case OPCODE_POW: |
||
84 | case OPCODE_EX2: |
||
85 | case OPCODE_LOG: |
||
86 | channel_mask = WRITEMASK_X; |
||
87 | break; |
||
88 | case OPCODE_DP2: |
||
89 | channel_mask = WRITEMASK_XY; |
||
90 | break; |
||
91 | case OPCODE_DP3: |
||
92 | case OPCODE_XPD: |
||
93 | channel_mask = WRITEMASK_XYZ; |
||
94 | break; |
||
95 | default: |
||
96 | channel_mask = WRITEMASK_XYZW; |
||
97 | break; |
||
98 | } |
||
99 | } |
||
100 | |||
101 | /* Now, given the src swizzle and the written channels, find which |
||
102 | * components are actually read |
||
103 | */ |
||
104 | read_mask = 0x0; |
||
105 | for (comp = 0; comp < 4; ++comp) { |
||
106 | const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); |
||
107 | ASSERT(coord < 4); |
||
108 | if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) |
||
109 | read_mask |= 1 << coord; |
||
110 | } |
||
111 | |||
112 | return read_mask; |
||
113 | } |
||
114 | |||
115 | |||
116 | /** |
||
117 | * For a MOV instruction, compute a write mask when src register also has |
||
118 | * a mask |
||
119 | */ |
||
120 | static GLuint |
||
121 | get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) |
||
122 | { |
||
123 | const GLuint mask = mov->DstReg.WriteMask; |
||
124 | GLuint comp; |
||
125 | GLuint updated_mask = 0x0; |
||
126 | |||
127 | ASSERT(mov->Opcode == OPCODE_MOV); |
||
128 | |||
129 | for (comp = 0; comp < 4; ++comp) { |
||
130 | GLuint src_comp; |
||
131 | if ((mask & (1 << comp)) == 0) |
||
132 | continue; |
||
133 | src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); |
||
134 | if ((src_mask & (1 << src_comp)) == 0) |
||
135 | continue; |
||
136 | updated_mask |= 1 << comp; |
||
137 | } |
||
138 | |||
139 | return updated_mask; |
||
140 | } |
||
141 | |||
142 | |||
143 | /** |
||
144 | * Ensure that the swizzle is regular. That is, all of the swizzle |
||
145 | * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. |
||
146 | */ |
||
147 | static GLboolean |
||
148 | is_swizzle_regular(GLuint swz) |
||
149 | { |
||
150 | return GET_SWZ(swz,0) <= SWIZZLE_W && |
||
151 | GET_SWZ(swz,1) <= SWIZZLE_W && |
||
152 | GET_SWZ(swz,2) <= SWIZZLE_W && |
||
153 | GET_SWZ(swz,3) <= SWIZZLE_W; |
||
154 | } |
||
155 | |||
156 | |||
157 | /** |
||
158 | * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. |
||
159 | * \return number of instructions removed |
||
160 | */ |
||
161 | static GLuint |
||
162 | remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) |
||
163 | { |
||
164 | GLint i, removeEnd = 0, removeCount = 0; |
||
165 | GLuint totalRemoved = 0; |
||
166 | |||
167 | /* go backward */ |
||
168 | for (i = prog->NumInstructions - 1; i >= 0; i--) { |
||
169 | if (removeFlags[i]) { |
||
170 | totalRemoved++; |
||
171 | if (removeCount == 0) { |
||
172 | /* begin a run of instructions to remove */ |
||
173 | removeEnd = i; |
||
174 | removeCount = 1; |
||
175 | } |
||
176 | else { |
||
177 | /* extend the run of instructions to remove */ |
||
178 | removeCount++; |
||
179 | } |
||
180 | } |
||
181 | else { |
||
182 | /* don't remove this instruction, but check if the preceeding |
||
183 | * instructions are to be removed. |
||
184 | */ |
||
185 | if (removeCount > 0) { |
||
186 | GLint removeStart = removeEnd - removeCount + 1; |
||
187 | _mesa_delete_instructions(prog, removeStart, removeCount); |
||
188 | removeStart = removeCount = 0; /* reset removal info */ |
||
189 | } |
||
190 | } |
||
191 | } |
||
192 | /* Finish removing if the first instruction was to be removed. */ |
||
193 | if (removeCount > 0) { |
||
194 | GLint removeStart = removeEnd - removeCount + 1; |
||
195 | _mesa_delete_instructions(prog, removeStart, removeCount); |
||
196 | } |
||
197 | return totalRemoved; |
||
198 | } |
||
199 | |||
200 | |||
201 | /** |
||
202 | * Remap register indexes according to map. |
||
203 | * \param prog the program to search/replace |
||
204 | * \param file the type of register file to search/replace |
||
205 | * \param map maps old register indexes to new indexes |
||
206 | */ |
||
207 | static void |
||
208 | replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) |
||
209 | { |
||
210 | GLuint i; |
||
211 | |||
212 | for (i = 0; i < prog->NumInstructions; i++) { |
||
213 | struct prog_instruction *inst = prog->Instructions + i; |
||
214 | const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); |
||
215 | GLuint j; |
||
216 | for (j = 0; j < numSrc; j++) { |
||
217 | if (inst->SrcReg[j].File == file) { |
||
218 | GLuint index = inst->SrcReg[j].Index; |
||
219 | ASSERT(map[index] >= 0); |
||
220 | inst->SrcReg[j].Index = map[index]; |
||
221 | } |
||
222 | } |
||
223 | if (inst->DstReg.File == file) { |
||
224 | const GLuint index = inst->DstReg.Index; |
||
225 | ASSERT(map[index] >= 0); |
||
226 | inst->DstReg.Index = map[index]; |
||
227 | } |
||
228 | } |
||
229 | } |
||
230 | |||
231 | |||
232 | /** |
||
233 | * Remove dead instructions from the given program. |
||
234 | * This is very primitive for now. Basically look for temp registers |
||
235 | * that are written to but never read. Remove any instructions that |
||
236 | * write to such registers. Be careful with condition code setters. |
||
237 | */ |
||
238 | static GLboolean |
||
239 | _mesa_remove_dead_code_global(struct gl_program *prog) |
||
240 | { |
||
241 | GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; |
||
242 | GLboolean *removeInst; /* per-instruction removal flag */ |
||
243 | GLuint i, rem = 0, comp; |
||
244 | |||
245 | memset(tempRead, 0, sizeof(tempRead)); |
||
246 | |||
247 | if (dbg) { |
||
248 | printf("Optimize: Begin dead code removal\n"); |
||
249 | /*_mesa_print_program(prog);*/ |
||
250 | } |
||
251 | |||
252 | removeInst = (GLboolean *) |
||
253 | calloc(1, prog->NumInstructions * sizeof(GLboolean)); |
||
254 | |||
255 | /* Determine which temps are read and written */ |
||
256 | for (i = 0; i < prog->NumInstructions; i++) { |
||
257 | const struct prog_instruction *inst = prog->Instructions + i; |
||
258 | const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); |
||
259 | GLuint j; |
||
260 | |||
261 | /* check src regs */ |
||
262 | for (j = 0; j < numSrc; j++) { |
||
263 | if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { |
||
264 | const GLuint index = inst->SrcReg[j].Index; |
||
265 | GLuint read_mask; |
||
266 | ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); |
||
267 | read_mask = get_src_arg_mask(inst, j, NO_MASK); |
||
268 | |||
269 | if (inst->SrcReg[j].RelAddr) { |
||
270 | if (dbg) |
||
271 | printf("abort remove dead code (indirect temp)\n"); |
||
272 | goto done; |
||
273 | } |
||
274 | |||
275 | for (comp = 0; comp < 4; comp++) { |
||
276 | const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); |
||
277 | ASSERT(swz < 4); |
||
278 | if ((read_mask & (1 << swz)) == 0) |
||
279 | continue; |
||
280 | if (swz <= SWIZZLE_W) |
||
281 | tempRead[index][swz] = GL_TRUE; |
||
282 | } |
||
283 | } |
||
284 | } |
||
285 | |||
286 | /* check dst reg */ |
||
287 | if (inst->DstReg.File == PROGRAM_TEMPORARY) { |
||
288 | const GLuint index = inst->DstReg.Index; |
||
289 | ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); |
||
290 | |||
291 | if (inst->DstReg.RelAddr) { |
||
292 | if (dbg) |
||
293 | printf("abort remove dead code (indirect temp)\n"); |
||
294 | goto done; |
||
295 | } |
||
296 | |||
297 | if (inst->CondUpdate) { |
||
298 | /* If we're writing to this register and setting condition |
||
299 | * codes we cannot remove the instruction. Prevent removal |
||
300 | * by setting the 'read' flag. |
||
301 | */ |
||
302 | tempRead[index][0] = GL_TRUE; |
||
303 | tempRead[index][1] = GL_TRUE; |
||
304 | tempRead[index][2] = GL_TRUE; |
||
305 | tempRead[index][3] = GL_TRUE; |
||
306 | } |
||
307 | } |
||
308 | } |
||
309 | |||
310 | /* find instructions that write to dead registers, flag for removal */ |
||
311 | for (i = 0; i < prog->NumInstructions; i++) { |
||
312 | struct prog_instruction *inst = prog->Instructions + i; |
||
313 | const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode); |
||
314 | |||
315 | if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) { |
||
316 | GLint chan, index = inst->DstReg.Index; |
||
317 | |||
318 | for (chan = 0; chan < 4; chan++) { |
||
319 | if (!tempRead[index][chan] && |
||
320 | inst->DstReg.WriteMask & (1 << chan)) { |
||
321 | if (dbg) { |
||
322 | printf("Remove writemask on %u.%c\n", i, |
||
323 | chan == 3 ? 'w' : 'x' + chan); |
||
324 | } |
||
325 | inst->DstReg.WriteMask &= ~(1 << chan); |
||
326 | rem++; |
||
327 | } |
||
328 | } |
||
329 | |||
330 | if (inst->DstReg.WriteMask == 0) { |
||
331 | /* If we cleared all writes, the instruction can be removed. */ |
||
332 | if (dbg) |
||
333 | printf("Remove instruction %u: \n", i); |
||
334 | removeInst[i] = GL_TRUE; |
||
335 | } |
||
336 | } |
||
337 | } |
||
338 | |||
339 | /* now remove the instructions which aren't needed */ |
||
340 | rem = remove_instructions(prog, removeInst); |
||
341 | |||
342 | if (dbg) { |
||
343 | printf("Optimize: End dead code removal.\n"); |
||
344 | printf(" %u channel writes removed\n", rem); |
||
345 | printf(" %u instructions removed\n", rem); |
||
346 | /*_mesa_print_program(prog);*/ |
||
347 | } |
||
348 | |||
349 | done: |
||
350 | free(removeInst); |
||
351 | return rem != 0; |
||
352 | } |
||
353 | |||
354 | |||
355 | enum inst_use |
||
356 | { |
||
357 | READ, |
||
358 | WRITE, |
||
359 | FLOW, |
||
360 | END |
||
361 | }; |
||
362 | |||
363 | |||
364 | /** |
||
365 | * Scan forward in program from 'start' for the next occurances of TEMP[index]. |
||
366 | * We look if an instruction reads the component given by the masks and if they |
||
367 | * are overwritten. |
||
368 | * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator |
||
369 | * that we can't look further. |
||
370 | */ |
||
371 | static enum inst_use |
||
372 | find_next_use(const struct gl_program *prog, |
||
373 | GLuint start, |
||
374 | GLuint index, |
||
375 | GLuint mask) |
||
376 | { |
||
377 | GLuint i; |
||
378 | |||
379 | for (i = start; i < prog->NumInstructions; i++) { |
||
380 | const struct prog_instruction *inst = prog->Instructions + i; |
||
381 | switch (inst->Opcode) { |
||
382 | case OPCODE_BGNLOOP: |
||
383 | case OPCODE_BGNSUB: |
||
384 | case OPCODE_BRA: |
||
385 | case OPCODE_CAL: |
||
386 | case OPCODE_CONT: |
||
387 | case OPCODE_IF: |
||
388 | case OPCODE_ELSE: |
||
389 | case OPCODE_ENDIF: |
||
390 | case OPCODE_ENDLOOP: |
||
391 | case OPCODE_ENDSUB: |
||
392 | case OPCODE_RET: |
||
393 | return FLOW; |
||
394 | case OPCODE_END: |
||
395 | return END; |
||
396 | default: |
||
397 | { |
||
398 | const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); |
||
399 | GLuint j; |
||
400 | for (j = 0; j < numSrc; j++) { |
||
401 | if (inst->SrcReg[j].RelAddr || |
||
402 | (inst->SrcReg[j].File == PROGRAM_TEMPORARY && |
||
403 | inst->SrcReg[j].Index == index && |
||
404 | (get_src_arg_mask(inst,j,NO_MASK) & mask))) |
||
405 | return READ; |
||
406 | } |
||
407 | if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && |
||
408 | inst->DstReg.File == PROGRAM_TEMPORARY && |
||
409 | inst->DstReg.Index == index) { |
||
410 | mask &= ~inst->DstReg.WriteMask; |
||
411 | if (mask == 0) |
||
412 | return WRITE; |
||
413 | } |
||
414 | } |
||
415 | } |
||
416 | } |
||
417 | return END; |
||
418 | } |
||
419 | |||
420 | |||
421 | /** |
||
422 | * Is the given instruction opcode a flow-control opcode? |
||
423 | * XXX maybe move this into prog_instruction.[ch] |
||
424 | */ |
||
425 | static GLboolean |
||
426 | _mesa_is_flow_control_opcode(enum prog_opcode opcode) |
||
427 | { |
||
428 | switch (opcode) { |
||
429 | case OPCODE_BGNLOOP: |
||
430 | case OPCODE_BGNSUB: |
||
431 | case OPCODE_BRA: |
||
432 | case OPCODE_CAL: |
||
433 | case OPCODE_CONT: |
||
434 | case OPCODE_IF: |
||
435 | case OPCODE_ELSE: |
||
436 | case OPCODE_END: |
||
437 | case OPCODE_ENDIF: |
||
438 | case OPCODE_ENDLOOP: |
||
439 | case OPCODE_ENDSUB: |
||
440 | case OPCODE_RET: |
||
441 | return GL_TRUE; |
||
442 | default: |
||
443 | return GL_FALSE; |
||
444 | } |
||
445 | } |
||
446 | |||
447 | |||
448 | /** |
||
449 | * Test if the given instruction is a simple MOV (no conditional updating, |
||
450 | * not relative addressing, no negation/abs, etc). |
||
451 | */ |
||
452 | static GLboolean |
||
453 | can_downward_mov_be_modifed(const struct prog_instruction *mov) |
||
454 | { |
||
455 | return |
||
456 | mov->Opcode == OPCODE_MOV && |
||
457 | mov->CondUpdate == GL_FALSE && |
||
458 | mov->SrcReg[0].RelAddr == 0 && |
||
459 | mov->SrcReg[0].Negate == 0 && |
||
460 | mov->SrcReg[0].Abs == 0 && |
||
461 | mov->SrcReg[0].HasIndex2 == 0 && |
||
462 | mov->SrcReg[0].RelAddr2 == 0 && |
||
463 | mov->DstReg.RelAddr == 0 && |
||
464 | mov->DstReg.CondMask == COND_TR && |
||
465 | mov->SaturateMode == SATURATE_OFF; |
||
466 | } |
||
467 | |||
468 | |||
469 | static GLboolean |
||
470 | can_upward_mov_be_modifed(const struct prog_instruction *mov) |
||
471 | { |
||
472 | return |
||
473 | can_downward_mov_be_modifed(mov) && |
||
474 | mov->DstReg.File == PROGRAM_TEMPORARY; |
||
475 | } |
||
476 | |||
477 | |||
478 | /** |
||
479 | * Try to remove use of extraneous MOV instructions, to free them up for dead |
||
480 | * code removal. |
||
481 | */ |
||
482 | static void |
||
483 | _mesa_remove_extra_move_use(struct gl_program *prog) |
||
484 | { |
||
485 | GLuint i, j; |
||
486 | |||
487 | if (dbg) { |
||
488 | printf("Optimize: Begin remove extra move use\n"); |
||
489 | _mesa_print_program(prog); |
||
490 | } |
||
491 | |||
492 | /* |
||
493 | * Look for sequences such as this: |
||
494 | * MOV tmpX, arg0; |
||
495 | * ... |
||
496 | * FOO tmpY, tmpX, arg1; |
||
497 | * and convert into: |
||
498 | * MOV tmpX, arg0; |
||
499 | * ... |
||
500 | * FOO tmpY, arg0, arg1; |
||
501 | */ |
||
502 | |||
503 | for (i = 0; i + 1 < prog->NumInstructions; i++) { |
||
504 | const struct prog_instruction *mov = prog->Instructions + i; |
||
505 | GLuint dst_mask, src_mask; |
||
506 | if (can_upward_mov_be_modifed(mov) == GL_FALSE) |
||
507 | continue; |
||
508 | |||
509 | /* Scanning the code, we maintain the components which are still active in |
||
510 | * these two masks |
||
511 | */ |
||
512 | dst_mask = mov->DstReg.WriteMask; |
||
513 | src_mask = get_src_arg_mask(mov, 0, NO_MASK); |
||
514 | |||
515 | /* Walk through remaining instructions until the or src reg gets |
||
516 | * rewritten or we get into some flow-control, eliminating the use of |
||
517 | * this MOV. |
||
518 | */ |
||
519 | for (j = i + 1; j < prog->NumInstructions; j++) { |
||
520 | struct prog_instruction *inst2 = prog->Instructions + j; |
||
521 | GLuint arg; |
||
522 | |||
523 | if (_mesa_is_flow_control_opcode(inst2->Opcode)) |
||
524 | break; |
||
525 | |||
526 | /* First rewrite this instruction's args if appropriate. */ |
||
527 | for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { |
||
528 | GLuint comp, read_mask; |
||
529 | |||
530 | if (inst2->SrcReg[arg].File != mov->DstReg.File || |
||
531 | inst2->SrcReg[arg].Index != mov->DstReg.Index || |
||
532 | inst2->SrcReg[arg].RelAddr || |
||
533 | inst2->SrcReg[arg].Abs) |
||
534 | continue; |
||
535 | read_mask = get_src_arg_mask(inst2, arg, NO_MASK); |
||
536 | |||
537 | /* Adjust the swizzles of inst2 to point at MOV's source if ALL the |
||
538 | * components read still come from the mov instructions |
||
539 | */ |
||
540 | if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && |
||
541 | (read_mask & dst_mask) == read_mask) { |
||
542 | for (comp = 0; comp < 4; comp++) { |
||
543 | const GLuint inst2_swz = |
||
544 | GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); |
||
545 | const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); |
||
546 | inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); |
||
547 | inst2->SrcReg[arg].Swizzle |= s << (3 * comp); |
||
548 | inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> |
||
549 | inst2_swz) & 0x1) << comp); |
||
550 | } |
||
551 | inst2->SrcReg[arg].File = mov->SrcReg[0].File; |
||
552 | inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; |
||
553 | } |
||
554 | } |
||
555 | |||
556 | /* The source of MOV is written. This potentially deactivates some |
||
557 | * components from the src and dst of the MOV instruction |
||
558 | */ |
||
559 | if (inst2->DstReg.File == mov->DstReg.File && |
||
560 | (inst2->DstReg.RelAddr || |
||
561 | inst2->DstReg.Index == mov->DstReg.Index)) { |
||
562 | dst_mask &= ~inst2->DstReg.WriteMask; |
||
563 | src_mask = get_src_arg_mask(mov, 0, dst_mask); |
||
564 | } |
||
565 | |||
566 | /* Idem when the destination of mov is written */ |
||
567 | if (inst2->DstReg.File == mov->SrcReg[0].File && |
||
568 | (inst2->DstReg.RelAddr || |
||
569 | inst2->DstReg.Index == mov->SrcReg[0].Index)) { |
||
570 | src_mask &= ~inst2->DstReg.WriteMask; |
||
571 | dst_mask &= get_dst_mask_for_mov(mov, src_mask); |
||
572 | } |
||
573 | if (dst_mask == 0) |
||
574 | break; |
||
575 | } |
||
576 | } |
||
577 | |||
578 | if (dbg) { |
||
579 | printf("Optimize: End remove extra move use.\n"); |
||
580 | /*_mesa_print_program(prog);*/ |
||
581 | } |
||
582 | } |
||
583 | |||
584 | |||
585 | /** |
||
586 | * Complements dead_code_global. Try to remove code in block of code by |
||
587 | * carefully monitoring the swizzles. Both functions should be merged into one |
||
588 | * with a proper control flow graph |
||
589 | */ |
||
590 | static GLboolean |
||
591 | _mesa_remove_dead_code_local(struct gl_program *prog) |
||
592 | { |
||
593 | GLboolean *removeInst; |
||
594 | GLuint i, arg, rem = 0; |
||
595 | |||
596 | removeInst = (GLboolean *) |
||
597 | calloc(1, prog->NumInstructions * sizeof(GLboolean)); |
||
598 | |||
599 | for (i = 0; i < prog->NumInstructions; i++) { |
||
600 | const struct prog_instruction *inst = prog->Instructions + i; |
||
601 | const GLuint index = inst->DstReg.Index; |
||
602 | const GLuint mask = inst->DstReg.WriteMask; |
||
603 | enum inst_use use; |
||
604 | |||
605 | /* We must deactivate the pass as soon as some indirection is used */ |
||
606 | if (inst->DstReg.RelAddr) |
||
607 | goto done; |
||
608 | for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) |
||
609 | if (inst->SrcReg[arg].RelAddr) |
||
610 | goto done; |
||
611 | |||
612 | if (_mesa_is_flow_control_opcode(inst->Opcode) || |
||
613 | _mesa_num_inst_dst_regs(inst->Opcode) == 0 || |
||
614 | inst->DstReg.File != PROGRAM_TEMPORARY || |
||
615 | inst->DstReg.RelAddr) |
||
616 | continue; |
||
617 | |||
618 | use = find_next_use(prog, i+1, index, mask); |
||
619 | if (use == WRITE || use == END) |
||
620 | removeInst[i] = GL_TRUE; |
||
621 | } |
||
622 | |||
623 | rem = remove_instructions(prog, removeInst); |
||
624 | |||
625 | done: |
||
626 | free(removeInst); |
||
627 | return rem != 0; |
||
628 | } |
||
629 | |||
630 | |||
631 | /** |
||
632 | * Try to inject the destination of mov as the destination of inst and recompute |
||
633 | * the swizzles operators for the sources of inst if required. Return GL_TRUE |
||
634 | * of the substitution was possible, GL_FALSE otherwise |
||
635 | */ |
||
636 | static GLboolean |
||
637 | _mesa_merge_mov_into_inst(struct prog_instruction *inst, |
||
638 | const struct prog_instruction *mov) |
||
639 | { |
||
640 | /* Indirection table which associates destination and source components for |
||
641 | * the mov instruction |
||
642 | */ |
||
643 | const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); |
||
644 | |||
645 | /* Some components are not written by inst. We cannot remove the mov */ |
||
646 | if (mask != (inst->DstReg.WriteMask & mask)) |
||
647 | return GL_FALSE; |
||
648 | |||
649 | /* Depending on the instruction, we may need to recompute the swizzles. |
||
650 | * Also, some other instructions (like TEX) are not linear. We will only |
||
651 | * consider completely active sources and destinations |
||
652 | */ |
||
653 | switch (inst->Opcode) { |
||
654 | |||
655 | /* Carstesian instructions: we compute the swizzle */ |
||
656 | case OPCODE_MOV: |
||
657 | case OPCODE_MIN: |
||
658 | case OPCODE_MAX: |
||
659 | case OPCODE_ABS: |
||
660 | case OPCODE_ADD: |
||
661 | case OPCODE_MAD: |
||
662 | case OPCODE_MUL: |
||
663 | case OPCODE_SUB: |
||
664 | { |
||
665 | GLuint dst_to_src_comp[4] = {0,0,0,0}; |
||
666 | GLuint dst_comp, arg; |
||
667 | for (dst_comp = 0; dst_comp < 4; ++dst_comp) { |
||
668 | if (mov->DstReg.WriteMask & (1 << dst_comp)) { |
||
669 | const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); |
||
670 | ASSERT(src_comp < 4); |
||
671 | dst_to_src_comp[dst_comp] = src_comp; |
||
672 | } |
||
673 | } |
||
674 | |||
675 | /* Patch each source of the instruction */ |
||
676 | for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { |
||
677 | const GLuint arg_swz = inst->SrcReg[arg].Swizzle; |
||
678 | inst->SrcReg[arg].Swizzle = 0; |
||
679 | |||
680 | /* Reset each active component of the swizzle */ |
||
681 | for (dst_comp = 0; dst_comp < 4; ++dst_comp) { |
||
682 | GLuint src_comp, arg_comp; |
||
683 | if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) |
||
684 | continue; |
||
685 | src_comp = dst_to_src_comp[dst_comp]; |
||
686 | ASSERT(src_comp < 4); |
||
687 | arg_comp = GET_SWZ(arg_swz, src_comp); |
||
688 | ASSERT(arg_comp < 4); |
||
689 | inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); |
||
690 | } |
||
691 | } |
||
692 | inst->DstReg = mov->DstReg; |
||
693 | return GL_TRUE; |
||
694 | } |
||
695 | |||
696 | /* Dot products and scalar instructions: we only change the destination */ |
||
697 | case OPCODE_RCP: |
||
698 | case OPCODE_SIN: |
||
699 | case OPCODE_COS: |
||
700 | case OPCODE_RSQ: |
||
701 | case OPCODE_POW: |
||
702 | case OPCODE_EX2: |
||
703 | case OPCODE_LOG: |
||
704 | case OPCODE_DP2: |
||
705 | case OPCODE_DP3: |
||
706 | case OPCODE_DP4: |
||
707 | inst->DstReg = mov->DstReg; |
||
708 | return GL_TRUE; |
||
709 | |||
710 | /* All other instructions require fully active components with no swizzle */ |
||
711 | default: |
||
712 | if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || |
||
713 | inst->DstReg.WriteMask != WRITEMASK_XYZW) |
||
714 | return GL_FALSE; |
||
715 | inst->DstReg = mov->DstReg; |
||
716 | return GL_TRUE; |
||
717 | } |
||
718 | } |
||
719 | |||
720 | |||
721 | /** |
||
722 | * Try to remove extraneous MOV instructions from the given program. |
||
723 | */ |
||
724 | static GLboolean |
||
725 | _mesa_remove_extra_moves(struct gl_program *prog) |
||
726 | { |
||
727 | GLboolean *removeInst; /* per-instruction removal flag */ |
||
728 | GLuint i, rem = 0, nesting = 0; |
||
729 | |||
730 | if (dbg) { |
||
731 | printf("Optimize: Begin remove extra moves\n"); |
||
732 | _mesa_print_program(prog); |
||
733 | } |
||
734 | |||
735 | removeInst = (GLboolean *) |
||
736 | calloc(1, prog->NumInstructions * sizeof(GLboolean)); |
||
737 | |||
738 | /* |
||
739 | * Look for sequences such as this: |
||
740 | * FOO tmpX, arg0, arg1; |
||
741 | * MOV tmpY, tmpX; |
||
742 | * and convert into: |
||
743 | * FOO tmpY, arg0, arg1; |
||
744 | */ |
||
745 | |||
746 | for (i = 0; i < prog->NumInstructions; i++) { |
||
747 | const struct prog_instruction *mov = prog->Instructions + i; |
||
748 | |||
749 | switch (mov->Opcode) { |
||
750 | case OPCODE_BGNLOOP: |
||
751 | case OPCODE_BGNSUB: |
||
752 | case OPCODE_IF: |
||
753 | nesting++; |
||
754 | break; |
||
755 | case OPCODE_ENDLOOP: |
||
756 | case OPCODE_ENDSUB: |
||
757 | case OPCODE_ENDIF: |
||
758 | nesting--; |
||
759 | break; |
||
760 | case OPCODE_MOV: |
||
761 | if (i > 0 && |
||
762 | can_downward_mov_be_modifed(mov) && |
||
763 | mov->SrcReg[0].File == PROGRAM_TEMPORARY && |
||
764 | nesting == 0) |
||
765 | { |
||
766 | |||
767 | /* see if this MOV can be removed */ |
||
768 | const GLuint id = mov->SrcReg[0].Index; |
||
769 | struct prog_instruction *prevInst; |
||
770 | GLuint prevI; |
||
771 | |||
772 | /* get pointer to previous instruction */ |
||
773 | prevI = i - 1; |
||
774 | while (prevI > 0 && removeInst[prevI]) |
||
775 | prevI--; |
||
776 | prevInst = prog->Instructions + prevI; |
||
777 | |||
778 | if (prevInst->DstReg.File == PROGRAM_TEMPORARY && |
||
779 | prevInst->DstReg.Index == id && |
||
780 | prevInst->DstReg.RelAddr == 0 && |
||
781 | prevInst->DstReg.CondSrc == 0 && |
||
782 | prevInst->DstReg.CondMask == COND_TR) { |
||
783 | |||
784 | const GLuint dst_mask = prevInst->DstReg.WriteMask; |
||
785 | enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); |
||
786 | |||
787 | if (next_use == WRITE || next_use == END) { |
||
788 | /* OK, we can safely remove this MOV instruction. |
||
789 | * Transform: |
||
790 | * prevI: FOO tempIndex, x, y; |
||
791 | * i: MOV z, tempIndex; |
||
792 | * Into: |
||
793 | * prevI: FOO z, x, y; |
||
794 | */ |
||
795 | if (_mesa_merge_mov_into_inst(prevInst, mov)) { |
||
796 | removeInst[i] = GL_TRUE; |
||
797 | if (dbg) { |
||
798 | printf("Remove MOV at %u\n", i); |
||
799 | printf("new prev inst %u: ", prevI); |
||
800 | _mesa_print_instruction(prevInst); |
||
801 | } |
||
802 | } |
||
803 | } |
||
804 | } |
||
805 | } |
||
806 | break; |
||
807 | default: |
||
808 | ; /* nothing */ |
||
809 | } |
||
810 | } |
||
811 | |||
812 | /* now remove the instructions which aren't needed */ |
||
813 | rem = remove_instructions(prog, removeInst); |
||
814 | |||
815 | free(removeInst); |
||
816 | |||
817 | if (dbg) { |
||
818 | printf("Optimize: End remove extra moves. %u instructions removed\n", rem); |
||
819 | /*_mesa_print_program(prog);*/ |
||
820 | } |
||
821 | |||
822 | return rem != 0; |
||
823 | } |
||
824 | |||
825 | |||
826 | /** A live register interval */ |
||
827 | struct interval |
||
828 | { |
||
829 | GLuint Reg; /** The temporary register index */ |
||
830 | GLuint Start, End; /** Start/end instruction numbers */ |
||
831 | }; |
||
832 | |||
833 | |||
834 | /** A list of register intervals */ |
||
835 | struct interval_list |
||
836 | { |
||
837 | GLuint Num; |
||
838 | struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; |
||
839 | }; |
||
840 | |||
841 | |||
842 | static void |
||
843 | append_interval(struct interval_list *list, const struct interval *inv) |
||
844 | { |
||
845 | list->Intervals[list->Num++] = *inv; |
||
846 | } |
||
847 | |||
848 | |||
849 | /** Insert interval inv into list, sorted by interval end */ |
||
850 | static void |
||
851 | insert_interval_by_end(struct interval_list *list, const struct interval *inv) |
||
852 | { |
||
853 | /* XXX we could do a binary search insertion here since list is sorted */ |
||
854 | GLint i = list->Num - 1; |
||
855 | while (i >= 0 && list->Intervals[i].End > inv->End) { |
||
856 | list->Intervals[i + 1] = list->Intervals[i]; |
||
857 | i--; |
||
858 | } |
||
859 | list->Intervals[i + 1] = *inv; |
||
860 | list->Num++; |
||
861 | |||
862 | #ifdef DEBUG |
||
863 | { |
||
864 | GLuint i; |
||
865 | for (i = 0; i + 1 < list->Num; i++) { |
||
866 | ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); |
||
867 | } |
||
868 | } |
||
869 | #endif |
||
870 | } |
||
871 | |||
872 | |||
873 | /** Remove the given interval from the interval list */ |
||
874 | static void |
||
875 | remove_interval(struct interval_list *list, const struct interval *inv) |
||
876 | { |
||
877 | /* XXX we could binary search since list is sorted */ |
||
878 | GLuint k; |
||
879 | for (k = 0; k < list->Num; k++) { |
||
880 | if (list->Intervals[k].Reg == inv->Reg) { |
||
881 | /* found, remove it */ |
||
882 | ASSERT(list->Intervals[k].Start == inv->Start); |
||
883 | ASSERT(list->Intervals[k].End == inv->End); |
||
884 | while (k < list->Num - 1) { |
||
885 | list->Intervals[k] = list->Intervals[k + 1]; |
||
886 | k++; |
||
887 | } |
||
888 | list->Num--; |
||
889 | return; |
||
890 | } |
||
891 | } |
||
892 | } |
||
893 | |||
894 | |||
895 | /** called by qsort() */ |
||
896 | static int |
||
897 | compare_start(const void *a, const void *b) |
||
898 | { |
||
899 | const struct interval *ia = (const struct interval *) a; |
||
900 | const struct interval *ib = (const struct interval *) b; |
||
901 | if (ia->Start < ib->Start) |
||
902 | return -1; |
||
903 | else if (ia->Start > ib->Start) |
||
904 | return +1; |
||
905 | else |
||
906 | return 0; |
||
907 | } |
||
908 | |||
909 | |||
910 | /** sort the interval list according to interval starts */ |
||
911 | static void |
||
912 | sort_interval_list_by_start(struct interval_list *list) |
||
913 | { |
||
914 | qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); |
||
915 | #ifdef DEBUG |
||
916 | { |
||
917 | GLuint i; |
||
918 | for (i = 0; i + 1 < list->Num; i++) { |
||
919 | ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); |
||
920 | } |
||
921 | } |
||
922 | #endif |
||
923 | } |
||
924 | |||
925 | struct loop_info |
||
926 | { |
||
927 | GLuint Start, End; /**< Start, end instructions of loop */ |
||
928 | }; |
||
929 | |||
930 | /** |
||
931 | * Update the intermediate interval info for register 'index' and |
||
932 | * instruction 'ic'. |
||
933 | */ |
||
934 | static void |
||
935 | update_interval(GLint intBegin[], GLint intEnd[], |
||
936 | struct loop_info *loopStack, GLuint loopStackDepth, |
||
937 | GLuint index, GLuint ic) |
||
938 | { |
||
939 | int i; |
||
940 | |||
941 | /* If the register is used in a loop, extend its lifetime through the end |
||
942 | * of the outermost loop that doesn't contain its definition. |
||
943 | */ |
||
944 | for (i = 0; i < loopStackDepth; i++) { |
||
945 | if (intBegin[index] < loopStack[i].Start) { |
||
946 | ic = loopStack[i].End; |
||
947 | break; |
||
948 | } |
||
949 | } |
||
950 | |||
951 | ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); |
||
952 | if (intBegin[index] == -1) { |
||
953 | ASSERT(intEnd[index] == -1); |
||
954 | intBegin[index] = intEnd[index] = ic; |
||
955 | } |
||
956 | else { |
||
957 | intEnd[index] = ic; |
||
958 | } |
||
959 | } |
||
960 | |||
961 | |||
962 | /** |
||
963 | * Find first/last instruction that references each temporary register. |
||
964 | */ |
||
965 | GLboolean |
||
966 | _mesa_find_temp_intervals(const struct prog_instruction *instructions, |
||
967 | GLuint numInstructions, |
||
968 | GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS], |
||
969 | GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) |
||
970 | { |
||
971 | struct loop_info loopStack[MAX_LOOP_NESTING]; |
||
972 | GLuint loopStackDepth = 0; |
||
973 | GLuint i; |
||
974 | |||
975 | for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ |
||
976 | intBegin[i] = intEnd[i] = -1; |
||
977 | } |
||
978 | |||
979 | /* Scan instructions looking for temporary registers */ |
||
980 | for (i = 0; i < numInstructions; i++) { |
||
981 | const struct prog_instruction *inst = instructions + i; |
||
982 | if (inst->Opcode == OPCODE_BGNLOOP) { |
||
983 | loopStack[loopStackDepth].Start = i; |
||
984 | loopStack[loopStackDepth].End = inst->BranchTarget; |
||
985 | loopStackDepth++; |
||
986 | } |
||
987 | else if (inst->Opcode == OPCODE_ENDLOOP) { |
||
988 | loopStackDepth--; |
||
989 | } |
||
990 | else if (inst->Opcode == OPCODE_CAL) { |
||
991 | return GL_FALSE; |
||
992 | } |
||
993 | else { |
||
994 | const GLuint numSrc = 3;/*_mesa_num_inst_src_regs(inst->Opcode);*/ |
||
995 | GLuint j; |
||
996 | for (j = 0; j < numSrc; j++) { |
||
997 | if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { |
||
998 | const GLuint index = inst->SrcReg[j].Index; |
||
999 | if (inst->SrcReg[j].RelAddr) |
||
1000 | return GL_FALSE; |
||
1001 | update_interval(intBegin, intEnd, loopStack, loopStackDepth, |
||
1002 | index, i); |
||
1003 | } |
||
1004 | } |
||
1005 | if (inst->DstReg.File == PROGRAM_TEMPORARY) { |
||
1006 | const GLuint index = inst->DstReg.Index; |
||
1007 | if (inst->DstReg.RelAddr) |
||
1008 | return GL_FALSE; |
||
1009 | update_interval(intBegin, intEnd, loopStack, loopStackDepth, |
||
1010 | index, i); |
||
1011 | } |
||
1012 | } |
||
1013 | } |
||
1014 | |||
1015 | return GL_TRUE; |
||
1016 | } |
||
1017 | |||
1018 | |||
1019 | /** |
||
1020 | * Find the live intervals for each temporary register in the program. |
||
1021 | * For register R, the interval [A,B] indicates that R is referenced |
||
1022 | * from instruction A through instruction B. |
||
1023 | * Special consideration is needed for loops and subroutines. |
||
1024 | * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason |
||
1025 | */ |
||
1026 | static GLboolean |
||
1027 | find_live_intervals(struct gl_program *prog, |
||
1028 | struct interval_list *liveIntervals) |
||
1029 | { |
||
1030 | GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; |
||
1031 | GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; |
||
1032 | GLuint i; |
||
1033 | |||
1034 | /* |
||
1035 | * Note: we'll return GL_FALSE below if we find relative indexing |
||
1036 | * into the TEMP register file. We can't handle that yet. |
||
1037 | * We also give up on subroutines for now. |
||
1038 | */ |
||
1039 | |||
1040 | if (dbg) { |
||
1041 | printf("Optimize: Begin find intervals\n"); |
||
1042 | } |
||
1043 | |||
1044 | /* build intermediate arrays */ |
||
1045 | if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions, |
||
1046 | intBegin, intEnd)) |
||
1047 | return GL_FALSE; |
||
1048 | |||
1049 | /* Build live intervals list from intermediate arrays */ |
||
1050 | liveIntervals->Num = 0; |
||
1051 | for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { |
||
1052 | if (intBegin[i] >= 0) { |
||
1053 | struct interval inv; |
||
1054 | inv.Reg = i; |
||
1055 | inv.Start = intBegin[i]; |
||
1056 | inv.End = intEnd[i]; |
||
1057 | append_interval(liveIntervals, &inv); |
||
1058 | } |
||
1059 | } |
||
1060 | |||
1061 | /* Sort the list according to interval starts */ |
||
1062 | sort_interval_list_by_start(liveIntervals); |
||
1063 | |||
1064 | if (dbg) { |
||
1065 | /* print interval info */ |
||
1066 | for (i = 0; i < liveIntervals->Num; i++) { |
||
1067 | const struct interval *inv = liveIntervals->Intervals + i; |
||
1068 | printf("Reg[%d] live [%d, %d]:", |
||
1069 | inv->Reg, inv->Start, inv->End); |
||
1070 | if (1) { |
||
1071 | GLuint j; |
||
1072 | for (j = 0; j < inv->Start; j++) |
||
1073 | printf(" "); |
||
1074 | for (j = inv->Start; j <= inv->End; j++) |
||
1075 | printf("x"); |
||
1076 | } |
||
1077 | printf("\n"); |
||
1078 | } |
||
1079 | } |
||
1080 | |||
1081 | return GL_TRUE; |
||
1082 | } |
||
1083 | |||
1084 | |||
1085 | /** Scan the array of used register flags to find free entry */ |
||
1086 | static GLint |
||
1087 | alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]) |
||
1088 | { |
||
1089 | GLuint k; |
||
1090 | for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) { |
||
1091 | if (!usedRegs[k]) { |
||
1092 | usedRegs[k] = GL_TRUE; |
||
1093 | return k; |
||
1094 | } |
||
1095 | } |
||
1096 | return -1; |
||
1097 | } |
||
1098 | |||
1099 | |||
1100 | /** |
||
1101 | * This function implements "Linear Scan Register Allocation" to reduce |
||
1102 | * the number of temporary registers used by the program. |
||
1103 | * |
||
1104 | * We compute the "live interval" for all temporary registers then |
||
1105 | * examine the overlap of the intervals to allocate new registers. |
||
1106 | * Basically, if two intervals do not overlap, they can use the same register. |
||
1107 | */ |
||
1108 | static void |
||
1109 | _mesa_reallocate_registers(struct gl_program *prog) |
||
1110 | { |
||
1111 | struct interval_list liveIntervals; |
||
1112 | GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; |
||
1113 | GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; |
||
1114 | GLuint i; |
||
1115 | GLint maxTemp = -1; |
||
1116 | |||
1117 | if (dbg) { |
||
1118 | printf("Optimize: Begin live-interval register reallocation\n"); |
||
1119 | _mesa_print_program(prog); |
||
1120 | } |
||
1121 | |||
1122 | for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){ |
||
1123 | registerMap[i] = -1; |
||
1124 | usedRegs[i] = GL_FALSE; |
||
1125 | } |
||
1126 | |||
1127 | if (!find_live_intervals(prog, &liveIntervals)) { |
||
1128 | if (dbg) |
||
1129 | printf("Aborting register reallocation\n"); |
||
1130 | return; |
||
1131 | } |
||
1132 | |||
1133 | { |
||
1134 | struct interval_list activeIntervals; |
||
1135 | activeIntervals.Num = 0; |
||
1136 | |||
1137 | /* loop over live intervals, allocating a new register for each */ |
||
1138 | for (i = 0; i < liveIntervals.Num; i++) { |
||
1139 | const struct interval *live = liveIntervals.Intervals + i; |
||
1140 | |||
1141 | if (dbg) |
||
1142 | printf("Consider register %u\n", live->Reg); |
||
1143 | |||
1144 | /* Expire old intervals. Intervals which have ended with respect |
||
1145 | * to the live interval can have their remapped registers freed. |
||
1146 | */ |
||
1147 | { |
||
1148 | GLint j; |
||
1149 | for (j = 0; j < (GLint) activeIntervals.Num; j++) { |
||
1150 | const struct interval *inv = activeIntervals.Intervals + j; |
||
1151 | if (inv->End >= live->Start) { |
||
1152 | /* Stop now. Since the activeInterval list is sorted |
||
1153 | * we know we don't have to go further. |
||
1154 | */ |
||
1155 | break; |
||
1156 | } |
||
1157 | else { |
||
1158 | /* Interval 'inv' has expired */ |
||
1159 | const GLint regNew = registerMap[inv->Reg]; |
||
1160 | ASSERT(regNew >= 0); |
||
1161 | |||
1162 | if (dbg) |
||
1163 | printf(" expire interval for reg %u\n", inv->Reg); |
||
1164 | |||
1165 | /* remove interval j from active list */ |
||
1166 | remove_interval(&activeIntervals, inv); |
||
1167 | j--; /* counter-act j++ in for-loop above */ |
||
1168 | |||
1169 | /* return register regNew to the free pool */ |
||
1170 | if (dbg) |
||
1171 | printf(" free reg %d\n", regNew); |
||
1172 | ASSERT(usedRegs[regNew] == GL_TRUE); |
||
1173 | usedRegs[regNew] = GL_FALSE; |
||
1174 | } |
||
1175 | } |
||
1176 | } |
||
1177 | |||
1178 | /* find a free register for this live interval */ |
||
1179 | { |
||
1180 | const GLint k = alloc_register(usedRegs); |
||
1181 | if (k < 0) { |
||
1182 | /* out of registers, give up */ |
||
1183 | return; |
||
1184 | } |
||
1185 | registerMap[live->Reg] = k; |
||
1186 | maxTemp = MAX2(maxTemp, k); |
||
1187 | if (dbg) |
||
1188 | printf(" remap register %u -> %d\n", live->Reg, k); |
||
1189 | } |
||
1190 | |||
1191 | /* Insert this live interval into the active list which is sorted |
||
1192 | * by increasing end points. |
||
1193 | */ |
||
1194 | insert_interval_by_end(&activeIntervals, live); |
||
1195 | } |
||
1196 | } |
||
1197 | |||
1198 | if (maxTemp + 1 < (GLint) liveIntervals.Num) { |
||
1199 | /* OK, we've reduced the number of registers needed. |
||
1200 | * Scan the program and replace all the old temporary register |
||
1201 | * indexes with the new indexes. |
||
1202 | */ |
||
1203 | replace_regs(prog, PROGRAM_TEMPORARY, registerMap); |
||
1204 | |||
1205 | prog->NumTemporaries = maxTemp + 1; |
||
1206 | } |
||
1207 | |||
1208 | if (dbg) { |
||
1209 | printf("Optimize: End live-interval register reallocation\n"); |
||
1210 | printf("Num temp regs before: %u after: %u\n", |
||
1211 | liveIntervals.Num, maxTemp + 1); |
||
1212 | _mesa_print_program(prog); |
||
1213 | } |
||
1214 | } |
||
1215 | |||
1216 | |||
1217 | #if 0 |
||
1218 | static void |
||
1219 | print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) { |
||
1220 | fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions); |
||
1221 | _mesa_print_program(program); |
||
1222 | _mesa_print_program_parameters(ctx, program); |
||
1223 | fprintf(stderr, "\n\n"); |
||
1224 | } |
||
1225 | #endif |
||
1226 | |||
1227 | |||
1228 | /** |
||
1229 | * Apply optimizations to the given program to eliminate unnecessary |
||
1230 | * instructions, temp regs, etc. |
||
1231 | */ |
||
1232 | void |
||
1233 | _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) |
||
1234 | { |
||
1235 | GLboolean any_change; |
||
1236 | |||
1237 | /* Stop when no modifications were output */ |
||
1238 | do { |
||
1239 | any_change = GL_FALSE; |
||
1240 | _mesa_remove_extra_move_use(program); |
||
1241 | if (_mesa_remove_dead_code_global(program)) |
||
1242 | any_change = GL_TRUE; |
||
1243 | if (_mesa_remove_extra_moves(program)) |
||
1244 | any_change = GL_TRUE; |
||
1245 | if (_mesa_remove_dead_code_local(program)) |
||
1246 | any_change = GL_TRUE; |
||
1247 | _mesa_reallocate_registers(program); |
||
1248 | } while (any_change); |
||
1249 | }>>>>>>=>>>>>>>>>>>=>>>>>=>>>><>>>><>>>>><>>>>><>><>><>>>>>>>><>><>>>>=>><>>>>>>>>=>=>=>=>><>><>><>>><>=>><>>>>><> |
||
1250 |