Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright (C) 2009 Nicolai Haehnle. |
||
3 | * |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining |
||
7 | * a copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial |
||
16 | * portions of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | #include "radeon_dataflow.h" |
||
29 | |||
30 | #include "radeon_compiler.h" |
||
31 | |||
32 | |||
33 | struct updatemask_state { |
||
34 | unsigned char Output[RC_REGISTER_MAX_INDEX]; |
||
35 | unsigned char Temporary[RC_REGISTER_MAX_INDEX]; |
||
36 | unsigned char Address; |
||
37 | unsigned char Special[RC_NUM_SPECIAL_REGISTERS]; |
||
38 | }; |
||
39 | |||
40 | struct instruction_state { |
||
41 | unsigned char WriteMask:4; |
||
42 | unsigned char WriteALUResult:1; |
||
43 | unsigned char SrcReg[3]; |
||
44 | }; |
||
45 | |||
46 | struct loopinfo { |
||
47 | struct updatemask_state * Breaks; |
||
48 | unsigned int BreakCount; |
||
49 | unsigned int BreaksReserved; |
||
50 | }; |
||
51 | |||
52 | struct branchinfo { |
||
53 | unsigned int HaveElse:1; |
||
54 | |||
55 | struct updatemask_state StoreEndif; |
||
56 | struct updatemask_state StoreElse; |
||
57 | }; |
||
58 | |||
59 | struct deadcode_state { |
||
60 | struct radeon_compiler * C; |
||
61 | struct instruction_state * Instructions; |
||
62 | |||
63 | struct updatemask_state R; |
||
64 | |||
65 | struct branchinfo * BranchStack; |
||
66 | unsigned int BranchStackSize; |
||
67 | unsigned int BranchStackReserved; |
||
68 | |||
69 | struct loopinfo * LoopStack; |
||
70 | unsigned int LoopStackSize; |
||
71 | unsigned int LoopStackReserved; |
||
72 | }; |
||
73 | |||
74 | |||
75 | static void or_updatemasks( |
||
76 | struct updatemask_state * dst, |
||
77 | struct updatemask_state * a, |
||
78 | struct updatemask_state * b) |
||
79 | { |
||
80 | for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) { |
||
81 | dst->Output[i] = a->Output[i] | b->Output[i]; |
||
82 | dst->Temporary[i] = a->Temporary[i] | b->Temporary[i]; |
||
83 | } |
||
84 | |||
85 | for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i) |
||
86 | dst->Special[i] = a->Special[i] | b->Special[i]; |
||
87 | |||
88 | dst->Address = a->Address | b->Address; |
||
89 | } |
||
90 | |||
91 | static void push_break(struct deadcode_state *s) |
||
92 | { |
||
93 | struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; |
||
94 | memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, |
||
95 | loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); |
||
96 | |||
97 | memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); |
||
98 | } |
||
99 | |||
100 | static void push_loop(struct deadcode_state * s) |
||
101 | { |
||
102 | memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, |
||
103 | s->LoopStackSize, s->LoopStackReserved, 1); |
||
104 | memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); |
||
105 | } |
||
106 | |||
107 | static void push_branch(struct deadcode_state * s) |
||
108 | { |
||
109 | struct branchinfo * branch; |
||
110 | |||
111 | memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, |
||
112 | s->BranchStackSize, s->BranchStackReserved, 1); |
||
113 | |||
114 | branch = &s->BranchStack[s->BranchStackSize++]; |
||
115 | branch->HaveElse = 0; |
||
116 | memcpy(&branch->StoreEndif, &s->R, sizeof(s->R)); |
||
117 | } |
||
118 | |||
119 | static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index) |
||
120 | { |
||
121 | if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) { |
||
122 | if (index >= RC_REGISTER_MAX_INDEX) { |
||
123 | rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file); |
||
124 | return 0; |
||
125 | } |
||
126 | |||
127 | if (file == RC_FILE_OUTPUT) |
||
128 | return &s->R.Output[index]; |
||
129 | else |
||
130 | return &s->R.Temporary[index]; |
||
131 | } else if (file == RC_FILE_ADDRESS) { |
||
132 | return &s->R.Address; |
||
133 | } else if (file == RC_FILE_SPECIAL) { |
||
134 | if (index >= RC_NUM_SPECIAL_REGISTERS) { |
||
135 | rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index); |
||
136 | return 0; |
||
137 | } |
||
138 | |||
139 | return &s->R.Special[index]; |
||
140 | } |
||
141 | |||
142 | return 0; |
||
143 | } |
||
144 | |||
145 | static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask) |
||
146 | { |
||
147 | unsigned char * pused = get_used_ptr(s, file, index); |
||
148 | if (pused) |
||
149 | *pused |= mask; |
||
150 | } |
||
151 | |||
152 | static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst) |
||
153 | { |
||
154 | const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
||
155 | struct instruction_state * insts = &s->Instructions[inst->IP]; |
||
156 | unsigned int usedmask = 0; |
||
157 | unsigned int srcmasks[3]; |
||
158 | |||
159 | if (opcode->HasDstReg) { |
||
160 | unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); |
||
161 | if (pused) { |
||
162 | usedmask = *pused & inst->U.I.DstReg.WriteMask; |
||
163 | *pused &= ~usedmask; |
||
164 | } |
||
165 | } |
||
166 | |||
167 | insts->WriteMask |= usedmask; |
||
168 | |||
169 | if (inst->U.I.WriteALUResult) { |
||
170 | unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT); |
||
171 | if (pused && *pused) { |
||
172 | if (inst->U.I.WriteALUResult == RC_ALURESULT_X) |
||
173 | usedmask |= RC_MASK_X; |
||
174 | else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) |
||
175 | usedmask |= RC_MASK_W; |
||
176 | |||
177 | *pused = 0; |
||
178 | insts->WriteALUResult = 1; |
||
179 | } |
||
180 | } |
||
181 | |||
182 | rc_compute_sources_for_writemask(inst, usedmask, srcmasks); |
||
183 | |||
184 | for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { |
||
185 | unsigned int refmask = 0; |
||
186 | unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src]; |
||
187 | insts->SrcReg[src] |= newsrcmask; |
||
188 | |||
189 | for(unsigned int chan = 0; chan < 4; ++chan) { |
||
190 | if (GET_BIT(newsrcmask, chan)) |
||
191 | refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan); |
||
192 | } |
||
193 | |||
194 | /* get rid of spurious bits from ZERO, ONE, etc. swizzles */ |
||
195 | refmask &= RC_MASK_XYZW; |
||
196 | |||
197 | if (!refmask) |
||
198 | continue; |
||
199 | |||
200 | mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask); |
||
201 | |||
202 | if (inst->U.I.SrcReg[src].RelAddr) |
||
203 | mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); |
||
204 | } |
||
205 | } |
||
206 | |||
207 | static void mark_output_use(void * data, unsigned int index, unsigned int mask) |
||
208 | { |
||
209 | struct deadcode_state * s = data; |
||
210 | |||
211 | mark_used(s, RC_FILE_OUTPUT, index, mask); |
||
212 | } |
||
213 | |||
214 | void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) |
||
215 | { |
||
216 | struct deadcode_state s; |
||
217 | unsigned int nr_instructions; |
||
218 | rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; |
||
219 | unsigned int ip; |
||
220 | |||
221 | memset(&s, 0, sizeof(s)); |
||
222 | s.C = c; |
||
223 | |||
224 | nr_instructions = rc_recompute_ips(c); |
||
225 | s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions); |
||
226 | memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions); |
||
227 | |||
228 | dce(c, &s, &mark_output_use); |
||
229 | |||
230 | for(struct rc_instruction * inst = c->Program.Instructions.Prev; |
||
231 | inst != &c->Program.Instructions; |
||
232 | inst = inst->Prev) { |
||
233 | const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
||
234 | |||
235 | switch(opcode->Opcode){ |
||
236 | /* Mark all sources in the loop body as used before doing |
||
237 | * normal deadcode analysis. This is probably not optimal. |
||
238 | */ |
||
239 | case RC_OPCODE_ENDLOOP: |
||
240 | { |
||
241 | int endloops = 1; |
||
242 | struct rc_instruction *ptr; |
||
243 | for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){ |
||
244 | opcode = rc_get_opcode_info(ptr->U.I.Opcode); |
||
245 | if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ |
||
246 | endloops--; |
||
247 | continue; |
||
248 | } |
||
249 | if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){ |
||
250 | endloops++; |
||
251 | continue; |
||
252 | } |
||
253 | if(opcode->HasDstReg){ |
||
254 | int src = 0; |
||
255 | unsigned int srcmasks[3]; |
||
256 | rc_compute_sources_for_writemask(ptr, |
||
257 | ptr->U.I.DstReg.WriteMask, srcmasks); |
||
258 | for(src=0; src < opcode->NumSrcRegs; src++){ |
||
259 | mark_used(&s, |
||
260 | ptr->U.I.SrcReg[src].File, |
||
261 | ptr->U.I.SrcReg[src].Index, |
||
262 | srcmasks[src]); |
||
263 | } |
||
264 | } |
||
265 | } |
||
266 | push_loop(&s); |
||
267 | break; |
||
268 | } |
||
269 | case RC_OPCODE_BRK: |
||
270 | push_break(&s); |
||
271 | break; |
||
272 | case RC_OPCODE_BGNLOOP: |
||
273 | { |
||
274 | unsigned int i; |
||
275 | struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; |
||
276 | for(i = 0; i < loop->BreakCount; i++) { |
||
277 | or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); |
||
278 | } |
||
279 | break; |
||
280 | } |
||
281 | case RC_OPCODE_CONT: |
||
282 | break; |
||
283 | case RC_OPCODE_ENDIF: |
||
284 | push_branch(&s); |
||
285 | break; |
||
286 | default: |
||
287 | if (opcode->IsFlowControl && s.BranchStackSize) { |
||
288 | struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1]; |
||
289 | if (opcode->Opcode == RC_OPCODE_IF) { |
||
290 | or_updatemasks(&s.R, |
||
291 | &s.R, |
||
292 | branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif); |
||
293 | |||
294 | s.BranchStackSize--; |
||
295 | } else if (opcode->Opcode == RC_OPCODE_ELSE) { |
||
296 | if (branch->HaveElse) { |
||
297 | rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__); |
||
298 | } else { |
||
299 | memcpy(&branch->StoreElse, &s.R, sizeof(s.R)); |
||
300 | memcpy(&s.R, &branch->StoreEndif, sizeof(s.R)); |
||
301 | branch->HaveElse = 1; |
||
302 | } |
||
303 | } else { |
||
304 | rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); |
||
305 | } |
||
306 | } |
||
307 | } |
||
308 | |||
309 | update_instruction(&s, inst); |
||
310 | } |
||
311 | |||
312 | ip = 0; |
||
313 | for(struct rc_instruction * inst = c->Program.Instructions.Next; |
||
314 | inst != &c->Program.Instructions; |
||
315 | inst = inst->Next, ++ip) { |
||
316 | const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); |
||
317 | int dead = 1; |
||
318 | unsigned int srcmasks[3]; |
||
319 | unsigned int usemask; |
||
320 | |||
321 | if (!opcode->HasDstReg) { |
||
322 | dead = 0; |
||
323 | } else { |
||
324 | inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask; |
||
325 | if (s.Instructions[ip].WriteMask) |
||
326 | dead = 0; |
||
327 | |||
328 | if (s.Instructions[ip].WriteALUResult) |
||
329 | dead = 0; |
||
330 | else |
||
331 | inst->U.I.WriteALUResult = RC_ALURESULT_NONE; |
||
332 | } |
||
333 | |||
334 | if (dead) { |
||
335 | struct rc_instruction * todelete = inst; |
||
336 | inst = inst->Prev; |
||
337 | rc_remove_instruction(todelete); |
||
338 | continue; |
||
339 | } |
||
340 | |||
341 | usemask = s.Instructions[ip].WriteMask; |
||
342 | |||
343 | if (inst->U.I.WriteALUResult == RC_ALURESULT_X) |
||
344 | usemask |= RC_MASK_X; |
||
345 | else if (inst->U.I.WriteALUResult == RC_ALURESULT_W) |
||
346 | usemask |= RC_MASK_W; |
||
347 | |||
348 | rc_compute_sources_for_writemask(inst, usemask, srcmasks); |
||
349 | |||
350 | for(unsigned int src = 0; src < 3; ++src) { |
||
351 | for(unsigned int chan = 0; chan < 4; ++chan) { |
||
352 | if (!GET_BIT(srcmasks[src], chan)) |
||
353 | SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED); |
||
354 | } |
||
355 | } |
||
356 | } |
||
357 | |||
358 | rc_calculate_inputs_outputs(c); |
||
359 | }>>>>><>>>>> |