Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1901 | serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Eric Anholt |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | /** @file register_allocate.c |
||
29 | * |
||
30 | * Graph-coloring register allocator. |
||
31 | */ |
||
32 | |||
33 | #include |
||
34 | |||
35 | #include "main/imports.h" |
||
36 | #include "main/macros.h" |
||
37 | #include "main/mtypes.h" |
||
38 | #include "register_allocate.h" |
||
39 | |||
40 | struct ra_reg { |
||
41 | char *name; |
||
42 | GLboolean *conflicts; |
||
43 | }; |
||
44 | |||
45 | struct ra_regs { |
||
46 | struct ra_reg *regs; |
||
47 | unsigned int count; |
||
48 | |||
49 | struct ra_class **classes; |
||
50 | unsigned int class_count; |
||
51 | }; |
||
52 | |||
53 | struct ra_class { |
||
54 | GLboolean *regs; |
||
55 | |||
56 | /** |
||
57 | * p_B in Runeson/Nyström paper. |
||
58 | * |
||
59 | * This is "how many regs are in the set." |
||
60 | */ |
||
61 | unsigned int p; |
||
62 | |||
63 | /** |
||
64 | * q_B,C in Runeson/Nyström paper. |
||
65 | */ |
||
66 | unsigned int *q; |
||
67 | }; |
||
68 | |||
69 | struct ra_node { |
||
70 | GLboolean *adjacency; |
||
71 | unsigned int class; |
||
72 | unsigned int adjacency_count; |
||
73 | unsigned int reg; |
||
74 | GLboolean in_stack; |
||
75 | float spill_cost; |
||
76 | }; |
||
77 | |||
78 | struct ra_graph { |
||
79 | struct ra_regs *regs; |
||
80 | /** |
||
81 | * the variables that need register allocation. |
||
82 | */ |
||
83 | struct ra_node *nodes; |
||
84 | unsigned int count; /**< count of nodes. */ |
||
85 | |||
86 | unsigned int *stack; |
||
87 | unsigned int stack_count; |
||
88 | }; |
||
89 | |||
90 | struct ra_regs * |
||
91 | ra_alloc_reg_set(unsigned int count) |
||
92 | { |
||
93 | unsigned int i; |
||
94 | struct ra_regs *regs; |
||
95 | |||
96 | regs = rzalloc(NULL, struct ra_regs); |
||
97 | regs->count = count; |
||
98 | regs->regs = rzalloc_array(regs, struct ra_reg, count); |
||
99 | |||
100 | for (i = 0; i < count; i++) { |
||
101 | regs->regs[i].conflicts = rzalloc_array(regs->regs, GLboolean, count); |
||
102 | regs->regs[i].conflicts[i] = GL_TRUE; |
||
103 | } |
||
104 | |||
105 | return regs; |
||
106 | } |
||
107 | |||
108 | void |
||
109 | ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) |
||
110 | { |
||
111 | regs->regs[r1].conflicts[r2] = GL_TRUE; |
||
112 | regs->regs[r2].conflicts[r1] = GL_TRUE; |
||
113 | } |
||
114 | |||
115 | unsigned int |
||
116 | ra_alloc_reg_class(struct ra_regs *regs) |
||
117 | { |
||
118 | struct ra_class *class; |
||
119 | |||
120 | regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, |
||
121 | regs->class_count + 1); |
||
122 | |||
123 | class = rzalloc(regs, struct ra_class); |
||
124 | regs->classes[regs->class_count] = class; |
||
125 | |||
126 | class->regs = rzalloc_array(class, GLboolean, regs->count); |
||
127 | |||
128 | return regs->class_count++; |
||
129 | } |
||
130 | |||
131 | void |
||
132 | ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) |
||
133 | { |
||
134 | struct ra_class *class = regs->classes[c]; |
||
135 | |||
136 | class->regs[r] = GL_TRUE; |
||
137 | class->p++; |
||
138 | } |
||
139 | |||
140 | /** |
||
141 | * Must be called after all conflicts and register classes have been |
||
142 | * set up and before the register set is used for allocation. |
||
143 | */ |
||
144 | void |
||
145 | ra_set_finalize(struct ra_regs *regs) |
||
146 | { |
||
147 | unsigned int b, c; |
||
148 | |||
149 | for (b = 0; b < regs->class_count; b++) { |
||
150 | regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count); |
||
151 | } |
||
152 | |||
153 | /* Compute, for each class B and C, how many regs of B an |
||
154 | * allocation to C could conflict with. |
||
155 | */ |
||
156 | for (b = 0; b < regs->class_count; b++) { |
||
157 | for (c = 0; c < regs->class_count; c++) { |
||
158 | unsigned int rc; |
||
159 | int max_conflicts = 0; |
||
160 | |||
161 | for (rc = 0; rc < regs->count; rc++) { |
||
162 | unsigned int rb; |
||
163 | int conflicts = 0; |
||
164 | |||
165 | if (!regs->classes[c]->regs[rc]) |
||
166 | continue; |
||
167 | |||
168 | for (rb = 0; rb < regs->count; rb++) { |
||
169 | if (regs->classes[b]->regs[rb] && |
||
170 | regs->regs[rb].conflicts[rc]) |
||
171 | conflicts++; |
||
172 | } |
||
173 | max_conflicts = MAX2(max_conflicts, conflicts); |
||
174 | } |
||
175 | regs->classes[b]->q[c] = max_conflicts; |
||
176 | } |
||
177 | } |
||
178 | } |
||
179 | |||
180 | struct ra_graph * |
||
181 | ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) |
||
182 | { |
||
183 | struct ra_graph *g; |
||
184 | unsigned int i; |
||
185 | |||
186 | g = rzalloc(regs, struct ra_graph); |
||
187 | g->regs = regs; |
||
188 | g->nodes = rzalloc_array(g, struct ra_node, count); |
||
189 | g->count = count; |
||
190 | |||
191 | g->stack = rzalloc_array(g, unsigned int, count); |
||
192 | |||
193 | for (i = 0; i < count; i++) { |
||
194 | g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count); |
||
195 | g->nodes[i].adjacency[i] = GL_TRUE; |
||
196 | g->nodes[i].reg = ~0; |
||
197 | } |
||
198 | |||
199 | return g; |
||
200 | } |
||
201 | |||
202 | void |
||
203 | ra_set_node_class(struct ra_graph *g, |
||
204 | unsigned int n, unsigned int class) |
||
205 | { |
||
206 | g->nodes[n].class = class; |
||
207 | } |
||
208 | |||
209 | void |
||
210 | ra_add_node_interference(struct ra_graph *g, |
||
211 | unsigned int n1, unsigned int n2) |
||
212 | { |
||
213 | if (g->nodes[n1].adjacency[n2]) |
||
214 | return; |
||
215 | |||
216 | g->nodes[n1].adjacency[n2] = GL_TRUE; |
||
217 | g->nodes[n2].adjacency_count++; |
||
218 | g->nodes[n2].adjacency[n1] = GL_TRUE; |
||
219 | g->nodes[n2].adjacency_count++; |
||
220 | } |
||
221 | |||
222 | static GLboolean pq_test(struct ra_graph *g, unsigned int n) |
||
223 | { |
||
224 | unsigned int j; |
||
225 | unsigned int q = 0; |
||
226 | int n_class = g->nodes[n].class; |
||
227 | |||
228 | for (j = 0; j < g->count; j++) { |
||
229 | if (j == n || g->nodes[j].in_stack) |
||
230 | continue; |
||
231 | |||
232 | if (g->nodes[n].adjacency[j]) { |
||
233 | unsigned int j_class = g->nodes[j].class; |
||
234 | q += g->regs->classes[n_class]->q[j_class]; |
||
235 | } |
||
236 | } |
||
237 | |||
238 | return q < g->regs->classes[n_class]->p; |
||
239 | } |
||
240 | |||
241 | /** |
||
242 | * Simplifies the interference graph by pushing all |
||
243 | * trivially-colorable nodes into a stack of nodes to be colored, |
||
244 | * removing them from the graph, and rinsing and repeating. |
||
245 | * |
||
246 | * Returns GL_TRUE if all nodes were removed from the graph. GL_FALSE |
||
247 | * means that either spilling will be required, or optimistic coloring |
||
248 | * should be applied. |
||
249 | */ |
||
250 | GLboolean |
||
251 | ra_simplify(struct ra_graph *g) |
||
252 | { |
||
253 | GLboolean progress = GL_TRUE; |
||
254 | int i; |
||
255 | |||
256 | while (progress) { |
||
257 | progress = GL_FALSE; |
||
258 | |||
259 | for (i = g->count - 1; i >= 0; i--) { |
||
260 | if (g->nodes[i].in_stack) |
||
261 | continue; |
||
262 | |||
263 | if (pq_test(g, i)) { |
||
264 | g->stack[g->stack_count] = i; |
||
265 | g->stack_count++; |
||
266 | g->nodes[i].in_stack = GL_TRUE; |
||
267 | progress = GL_TRUE; |
||
268 | } |
||
269 | } |
||
270 | } |
||
271 | |||
272 | for (i = 0; i < g->count; i++) { |
||
273 | if (!g->nodes[i].in_stack) |
||
274 | return GL_FALSE; |
||
275 | } |
||
276 | |||
277 | return GL_TRUE; |
||
278 | } |
||
279 | |||
280 | /** |
||
281 | * Pops nodes from the stack back into the graph, coloring them with |
||
282 | * registers as they go. |
||
283 | * |
||
284 | * If all nodes were trivially colorable, then this must succeed. If |
||
285 | * not (optimistic coloring), then it may return GL_FALSE; |
||
286 | */ |
||
287 | GLboolean |
||
288 | ra_select(struct ra_graph *g) |
||
289 | { |
||
290 | int i; |
||
291 | |||
292 | while (g->stack_count != 0) { |
||
293 | unsigned int r; |
||
294 | int n = g->stack[g->stack_count - 1]; |
||
295 | struct ra_class *c = g->regs->classes[g->nodes[n].class]; |
||
296 | |||
297 | /* Find the lowest-numbered reg which is not used by a member |
||
298 | * of the graph adjacent to us. |
||
299 | */ |
||
300 | for (r = 0; r < g->regs->count; r++) { |
||
301 | if (!c->regs[r]) |
||
302 | continue; |
||
303 | |||
304 | /* Check if any of our neighbors conflict with this register choice. */ |
||
305 | for (i = 0; i < g->count; i++) { |
||
306 | if (g->nodes[n].adjacency[i] && |
||
307 | !g->nodes[i].in_stack && |
||
308 | g->regs->regs[r].conflicts[g->nodes[i].reg]) { |
||
309 | break; |
||
310 | } |
||
311 | } |
||
312 | if (i == g->count) |
||
313 | break; |
||
314 | } |
||
315 | if (r == g->regs->count) |
||
316 | return GL_FALSE; |
||
317 | |||
318 | g->nodes[n].reg = r; |
||
319 | g->nodes[n].in_stack = GL_FALSE; |
||
320 | g->stack_count--; |
||
321 | } |
||
322 | |||
323 | return GL_TRUE; |
||
324 | } |
||
325 | |||
326 | /** |
||
327 | * Optimistic register coloring: Just push the remaining nodes |
||
328 | * on the stack. They'll be colored first in ra_select(), and |
||
329 | * if they succeed then the locally-colorable nodes are still |
||
330 | * locally-colorable and the rest of the register allocation |
||
331 | * will succeed. |
||
332 | */ |
||
333 | void |
||
334 | ra_optimistic_color(struct ra_graph *g) |
||
335 | { |
||
336 | unsigned int i; |
||
337 | |||
338 | for (i = 0; i < g->count; i++) { |
||
339 | if (g->nodes[i].in_stack) |
||
340 | continue; |
||
341 | |||
342 | g->stack[g->stack_count] = i; |
||
343 | g->stack_count++; |
||
344 | g->nodes[i].in_stack = GL_TRUE; |
||
345 | } |
||
346 | } |
||
347 | |||
348 | GLboolean |
||
349 | ra_allocate_no_spills(struct ra_graph *g) |
||
350 | { |
||
351 | if (!ra_simplify(g)) { |
||
352 | ra_optimistic_color(g); |
||
353 | } |
||
354 | return ra_select(g); |
||
355 | } |
||
356 | |||
357 | unsigned int |
||
358 | ra_get_node_reg(struct ra_graph *g, unsigned int n) |
||
359 | { |
||
360 | return g->nodes[n].reg; |
||
361 | } |
||
362 | |||
363 | static float |
||
364 | ra_get_spill_benefit(struct ra_graph *g, unsigned int n) |
||
365 | { |
||
366 | int j; |
||
367 | float benefit = 0; |
||
368 | int n_class = g->nodes[n].class; |
||
369 | |||
370 | /* Define the benefit of eliminating an interference between n, j |
||
371 | * through spilling as q(C, B) / p(C). This is similar to the |
||
372 | * "count number of edges" approach of traditional graph coloring, |
||
373 | * but takes classes into account. |
||
374 | */ |
||
375 | for (j = 0; j < g->count; j++) { |
||
376 | if (j != n && g->nodes[n].adjacency[j]) { |
||
377 | unsigned int j_class = g->nodes[j].class; |
||
378 | benefit += ((float)g->regs->classes[n_class]->q[j_class] / |
||
379 | g->regs->classes[n_class]->p); |
||
380 | break; |
||
381 | } |
||
382 | } |
||
383 | |||
384 | return benefit; |
||
385 | } |
||
386 | |||
387 | /** |
||
388 | * Returns a node number to be spilled according to the cost/benefit using |
||
389 | * the pq test, or -1 if there are no spillable nodes. |
||
390 | */ |
||
391 | int |
||
392 | ra_get_best_spill_node(struct ra_graph *g) |
||
393 | { |
||
394 | unsigned int best_node = -1; |
||
395 | unsigned int best_benefit = 0.0; |
||
396 | unsigned int n; |
||
397 | |||
398 | for (n = 0; n < g->count; n++) { |
||
399 | float cost = g->nodes[n].spill_cost; |
||
400 | float benefit; |
||
401 | |||
402 | if (cost <= 0.0) |
||
403 | continue; |
||
404 | |||
405 | benefit = ra_get_spill_benefit(g, n); |
||
406 | |||
407 | if (benefit / cost > best_benefit) { |
||
408 | best_benefit = benefit / cost; |
||
409 | best_node = n; |
||
410 | } |
||
411 | } |
||
412 | |||
413 | return best_node; |
||
414 | } |
||
415 | |||
416 | /** |
||
417 | * Only nodes with a spill cost set (cost != 0.0) will be considered |
||
418 | * for register spilling. |
||
419 | */ |
||
420 | void |
||
421 | ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) |
||
422 | { |
||
423 | g->nodes[n].spill_cost = cost; |
||
424 | }=>>>>>>>>>>>>>>>>> |