Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright (C) 2009 Nicolai Haehnle. |
||
3 | * Copyright 2011 Tom Stellard |
||
4 | * |
||
5 | * All Rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining |
||
8 | * a copy of this software and associated documentation files (the |
||
9 | * "Software"), to deal in the Software without restriction, including |
||
10 | * without limitation the rights to use, copy, modify, merge, publish, |
||
11 | * distribute, sublicense, and/or sell copies of the Software, and to |
||
12 | * permit persons to whom the Software is furnished to do so, subject to |
||
13 | * the following conditions: |
||
14 | * |
||
15 | * The above copyright notice and this permission notice (including the |
||
16 | * next paragraph) shall be included in all copies or substantial |
||
17 | * portions of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
20 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
22 | * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
23 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
24 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
25 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
26 | * |
||
27 | */ |
||
28 | |||
29 | #include "radeon_program_pair.h" |
||
30 | |||
31 | #include |
||
32 | |||
33 | #include "main/glheader.h" |
||
34 | #include "util/register_allocate.h" |
||
35 | #include "util/u_memory.h" |
||
36 | #include "util/ralloc.h" |
||
37 | |||
38 | #include "r300_fragprog_swizzle.h" |
||
39 | #include "radeon_compiler.h" |
||
40 | #include "radeon_compiler_util.h" |
||
41 | #include "radeon_dataflow.h" |
||
42 | #include "radeon_list.h" |
||
43 | #include "radeon_regalloc.h" |
||
44 | #include "radeon_variable.h" |
||
45 | |||
46 | #define VERBOSE 0 |
||
47 | |||
48 | #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) |
||
49 | |||
50 | |||
51 | |||
52 | struct register_info { |
||
53 | struct live_intervals Live[4]; |
||
54 | |||
55 | unsigned int Used:1; |
||
56 | unsigned int Allocated:1; |
||
57 | unsigned int File:3; |
||
58 | unsigned int Index:RC_REGISTER_INDEX_BITS; |
||
59 | unsigned int Writemask; |
||
60 | }; |
||
61 | |||
62 | struct regalloc_state { |
||
63 | struct radeon_compiler * C; |
||
64 | |||
65 | struct register_info * Input; |
||
66 | unsigned int NumInputs; |
||
67 | |||
68 | struct register_info * Temporary; |
||
69 | unsigned int NumTemporaries; |
||
70 | |||
71 | unsigned int Simple; |
||
72 | int LoopEnd; |
||
73 | }; |
||
74 | |||
75 | struct rc_class { |
||
76 | enum rc_reg_class ID; |
||
77 | |||
78 | unsigned int WritemaskCount; |
||
79 | |||
80 | /** List of writemasks that belong to this class */ |
||
81 | unsigned int Writemasks[3]; |
||
82 | |||
83 | |||
84 | }; |
||
85 | |||
86 | static const struct rc_class rc_class_list [] = { |
||
87 | {RC_REG_CLASS_SINGLE, 3, |
||
88 | {RC_MASK_X, |
||
89 | RC_MASK_Y, |
||
90 | RC_MASK_Z}}, |
||
91 | {RC_REG_CLASS_DOUBLE, 3, |
||
92 | {RC_MASK_X | RC_MASK_Y, |
||
93 | RC_MASK_X | RC_MASK_Z, |
||
94 | RC_MASK_Y | RC_MASK_Z}}, |
||
95 | {RC_REG_CLASS_TRIPLE, 1, |
||
96 | {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, |
||
97 | RC_MASK_NONE, |
||
98 | RC_MASK_NONE}}, |
||
99 | {RC_REG_CLASS_ALPHA, 1, |
||
100 | {RC_MASK_W, |
||
101 | RC_MASK_NONE, |
||
102 | RC_MASK_NONE}}, |
||
103 | {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, |
||
104 | {RC_MASK_X | RC_MASK_W, |
||
105 | RC_MASK_Y | RC_MASK_W, |
||
106 | RC_MASK_Z | RC_MASK_W}}, |
||
107 | {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, |
||
108 | {RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
||
109 | RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
||
110 | RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, |
||
111 | {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, |
||
112 | {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
||
113 | RC_MASK_NONE, |
||
114 | RC_MASK_NONE}}, |
||
115 | {RC_REG_CLASS_X, 1, |
||
116 | {RC_MASK_X, |
||
117 | RC_MASK_NONE, |
||
118 | RC_MASK_NONE}}, |
||
119 | {RC_REG_CLASS_Y, 1, |
||
120 | {RC_MASK_Y, |
||
121 | RC_MASK_NONE, |
||
122 | RC_MASK_NONE}}, |
||
123 | {RC_REG_CLASS_Z, 1, |
||
124 | {RC_MASK_Z, |
||
125 | RC_MASK_NONE, |
||
126 | RC_MASK_NONE}}, |
||
127 | {RC_REG_CLASS_XY, 1, |
||
128 | {RC_MASK_X | RC_MASK_Y, |
||
129 | RC_MASK_NONE, |
||
130 | RC_MASK_NONE}}, |
||
131 | {RC_REG_CLASS_YZ, 1, |
||
132 | {RC_MASK_Y | RC_MASK_Z, |
||
133 | RC_MASK_NONE, |
||
134 | RC_MASK_NONE}}, |
||
135 | {RC_REG_CLASS_XZ, 1, |
||
136 | {RC_MASK_X | RC_MASK_Z, |
||
137 | RC_MASK_NONE, |
||
138 | RC_MASK_NONE}}, |
||
139 | {RC_REG_CLASS_XW, 1, |
||
140 | {RC_MASK_X | RC_MASK_W, |
||
141 | RC_MASK_NONE, |
||
142 | RC_MASK_NONE}}, |
||
143 | {RC_REG_CLASS_YW, 1, |
||
144 | {RC_MASK_Y | RC_MASK_W, |
||
145 | RC_MASK_NONE, |
||
146 | RC_MASK_NONE}}, |
||
147 | {RC_REG_CLASS_ZW, 1, |
||
148 | {RC_MASK_Z | RC_MASK_W, |
||
149 | RC_MASK_NONE, |
||
150 | RC_MASK_NONE}}, |
||
151 | {RC_REG_CLASS_XYW, 1, |
||
152 | {RC_MASK_X | RC_MASK_Y | RC_MASK_W, |
||
153 | RC_MASK_NONE, |
||
154 | RC_MASK_NONE}}, |
||
155 | {RC_REG_CLASS_YZW, 1, |
||
156 | {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, |
||
157 | RC_MASK_NONE, |
||
158 | RC_MASK_NONE}}, |
||
159 | {RC_REG_CLASS_XZW, 1, |
||
160 | {RC_MASK_X | RC_MASK_Z | RC_MASK_W, |
||
161 | RC_MASK_NONE, |
||
162 | RC_MASK_NONE}} |
||
163 | }; |
||
164 | |||
165 | static void print_live_intervals(struct live_intervals * src) |
||
166 | { |
||
167 | if (!src || !src->Used) { |
||
168 | DBG("(null)"); |
||
169 | return; |
||
170 | } |
||
171 | |||
172 | DBG("(%i,%i)", src->Start, src->End); |
||
173 | } |
||
174 | |||
175 | static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) |
||
176 | { |
||
177 | if (VERBOSE) { |
||
178 | DBG("overlap_live_intervals: "); |
||
179 | print_live_intervals(a); |
||
180 | DBG(" to "); |
||
181 | print_live_intervals(b); |
||
182 | DBG("\n"); |
||
183 | } |
||
184 | |||
185 | if (!a->Used || !b->Used) { |
||
186 | DBG(" unused interval\n"); |
||
187 | return 0; |
||
188 | } |
||
189 | |||
190 | if (a->Start > b->Start) { |
||
191 | if (a->Start < b->End) { |
||
192 | DBG(" overlap\n"); |
||
193 | return 1; |
||
194 | } |
||
195 | } else if (b->Start > a->Start) { |
||
196 | if (b->Start < a->End) { |
||
197 | DBG(" overlap\n"); |
||
198 | return 1; |
||
199 | } |
||
200 | } else { /* a->Start == b->Start */ |
||
201 | if (a->Start != a->End && b->Start != b->End) { |
||
202 | DBG(" overlap\n"); |
||
203 | return 1; |
||
204 | } |
||
205 | } |
||
206 | |||
207 | DBG(" no overlap\n"); |
||
208 | |||
209 | return 0; |
||
210 | } |
||
211 | |||
212 | static void scan_read_callback(void * data, struct rc_instruction * inst, |
||
213 | rc_register_file file, unsigned int index, unsigned int mask) |
||
214 | { |
||
215 | struct regalloc_state * s = data; |
||
216 | struct register_info * reg; |
||
217 | unsigned int i; |
||
218 | |||
219 | if (file != RC_FILE_INPUT) |
||
220 | return; |
||
221 | |||
222 | s->Input[index].Used = 1; |
||
223 | reg = &s->Input[index]; |
||
224 | |||
225 | for (i = 0; i < 4; i++) { |
||
226 | if (!((mask >> i) & 0x1)) { |
||
227 | continue; |
||
228 | } |
||
229 | reg->Live[i].Used = 1; |
||
230 | reg->Live[i].Start = 0; |
||
231 | reg->Live[i].End = |
||
232 | s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; |
||
233 | } |
||
234 | } |
||
235 | |||
236 | static void remap_register(void * data, struct rc_instruction * inst, |
||
237 | rc_register_file * file, unsigned int * index) |
||
238 | { |
||
239 | struct regalloc_state * s = data; |
||
240 | const struct register_info * reg; |
||
241 | |||
242 | if (*file == RC_FILE_TEMPORARY && s->Simple) |
||
243 | reg = &s->Temporary[*index]; |
||
244 | else if (*file == RC_FILE_INPUT) |
||
245 | reg = &s->Input[*index]; |
||
246 | else |
||
247 | return; |
||
248 | |||
249 | if (reg->Allocated) { |
||
250 | *index = reg->Index; |
||
251 | } |
||
252 | } |
||
253 | |||
254 | static void alloc_input_simple(void * data, unsigned int input, |
||
255 | unsigned int hwreg) |
||
256 | { |
||
257 | struct regalloc_state * s = data; |
||
258 | |||
259 | if (input >= s->NumInputs) |
||
260 | return; |
||
261 | |||
262 | s->Input[input].Allocated = 1; |
||
263 | s->Input[input].File = RC_FILE_TEMPORARY; |
||
264 | s->Input[input].Index = hwreg; |
||
265 | } |
||
266 | |||
267 | /* This functions offsets the temporary register indices by the number |
||
268 | * of input registers, because input registers are actually temporaries and |
||
269 | * should not occupy the same space. |
||
270 | * |
||
271 | * This pass is supposed to be used to maintain correct allocation of inputs |
||
272 | * if the standard register allocation is disabled. */ |
||
273 | static void do_regalloc_inputs_only(struct regalloc_state * s) |
||
274 | { |
||
275 | for (unsigned i = 0; i < s->NumTemporaries; i++) { |
||
276 | s->Temporary[i].Allocated = 1; |
||
277 | s->Temporary[i].File = RC_FILE_TEMPORARY; |
||
278 | s->Temporary[i].Index = i + s->NumInputs; |
||
279 | } |
||
280 | } |
||
281 | |||
282 | static unsigned int is_derivative(rc_opcode op) |
||
283 | { |
||
284 | return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); |
||
285 | } |
||
286 | |||
287 | static int find_class( |
||
288 | const struct rc_class * classes, |
||
289 | unsigned int writemask, |
||
290 | unsigned int max_writemask_count) |
||
291 | { |
||
292 | unsigned int i; |
||
293 | for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
||
294 | unsigned int j; |
||
295 | if (classes[i].WritemaskCount > max_writemask_count) { |
||
296 | continue; |
||
297 | } |
||
298 | for (j = 0; j < 3; j++) { |
||
299 | if (classes[i].Writemasks[j] == writemask) { |
||
300 | return i; |
||
301 | } |
||
302 | } |
||
303 | } |
||
304 | return -1; |
||
305 | } |
||
306 | |||
307 | struct variable_get_class_cb_data { |
||
308 | unsigned int * can_change_writemask; |
||
309 | unsigned int conversion_swizzle; |
||
310 | }; |
||
311 | |||
312 | static void variable_get_class_read_cb( |
||
313 | void * userdata, |
||
314 | struct rc_instruction * inst, |
||
315 | struct rc_pair_instruction_arg * arg, |
||
316 | struct rc_pair_instruction_source * src) |
||
317 | { |
||
318 | struct variable_get_class_cb_data * d = userdata; |
||
319 | unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, |
||
320 | d->conversion_swizzle); |
||
321 | if (!r300_swizzle_is_native_basic(new_swizzle)) { |
||
322 | *d->can_change_writemask = 0; |
||
323 | } |
||
324 | } |
||
325 | |||
326 | static enum rc_reg_class variable_get_class( |
||
327 | struct rc_variable * variable, |
||
328 | const struct rc_class * classes) |
||
329 | { |
||
330 | unsigned int i; |
||
331 | unsigned int can_change_writemask= 1; |
||
332 | unsigned int writemask = rc_variable_writemask_sum(variable); |
||
333 | struct rc_list * readers = rc_variable_readers_union(variable); |
||
334 | int class_index; |
||
335 | |||
336 | if (!variable->C->is_r500) { |
||
337 | struct rc_class c; |
||
338 | struct rc_variable * var_ptr; |
||
339 | /* The assumption here is that if an instruction has type |
||
340 | * RC_INSTRUCTION_NORMAL then it is a TEX instruction. |
||
341 | * r300 and r400 can't swizzle the result of a TEX lookup. */ |
||
342 | for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { |
||
343 | if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { |
||
344 | writemask = RC_MASK_XYZW; |
||
345 | } |
||
346 | } |
||
347 | |||
348 | /* Check if it is possible to do swizzle packing for r300/r400 |
||
349 | * without creating non-native swizzles. */ |
||
350 | class_index = find_class(classes, writemask, 3); |
||
351 | if (class_index < 0) { |
||
352 | goto error; |
||
353 | } |
||
354 | c = classes[class_index]; |
||
355 | if (c.WritemaskCount == 1) { |
||
356 | goto done; |
||
357 | } |
||
358 | for (i = 0; i < c.WritemaskCount; i++) { |
||
359 | struct rc_variable * var_ptr; |
||
360 | for (var_ptr = variable; var_ptr; |
||
361 | var_ptr = var_ptr->Friend) { |
||
362 | int j; |
||
363 | unsigned int conversion_swizzle = |
||
364 | rc_make_conversion_swizzle( |
||
365 | writemask, c.Writemasks[i]); |
||
366 | struct variable_get_class_cb_data d; |
||
367 | d.can_change_writemask = &can_change_writemask; |
||
368 | d.conversion_swizzle = conversion_swizzle; |
||
369 | /* If we get this far var_ptr->Inst has to |
||
370 | * be a pair instruction. If variable or any |
||
371 | * of its friends are normal instructions, |
||
372 | * then the writemask will be set to RC_MASK_XYZW |
||
373 | * and the function will return before it gets |
||
374 | * here. */ |
||
375 | rc_pair_for_all_reads_arg(var_ptr->Inst, |
||
376 | variable_get_class_read_cb, &d); |
||
377 | |||
378 | for (j = 0; j < var_ptr->ReaderCount; j++) { |
||
379 | unsigned int old_swizzle; |
||
380 | unsigned int new_swizzle; |
||
381 | struct rc_reader r = var_ptr->Readers[j]; |
||
382 | if (r.Inst->Type == |
||
383 | RC_INSTRUCTION_PAIR ) { |
||
384 | old_swizzle = r.U.P.Arg->Swizzle; |
||
385 | } else { |
||
386 | /* Source operands of TEX |
||
387 | * instructions can't be |
||
388 | * swizzle on r300/r400 GPUs. |
||
389 | */ |
||
390 | can_change_writemask = 0; |
||
391 | break; |
||
392 | } |
||
393 | new_swizzle = rc_adjust_channels( |
||
394 | old_swizzle, conversion_swizzle); |
||
395 | if (!r300_swizzle_is_native_basic( |
||
396 | new_swizzle)) { |
||
397 | can_change_writemask = 0; |
||
398 | break; |
||
399 | } |
||
400 | } |
||
401 | if (!can_change_writemask) { |
||
402 | break; |
||
403 | } |
||
404 | } |
||
405 | if (!can_change_writemask) { |
||
406 | break; |
||
407 | } |
||
408 | } |
||
409 | } |
||
410 | |||
411 | if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { |
||
412 | /* DDX/DDY seem to always fail when their writemasks are |
||
413 | * changed.*/ |
||
414 | if (is_derivative(variable->Inst->U.P.RGB.Opcode) |
||
415 | || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { |
||
416 | can_change_writemask = 0; |
||
417 | } |
||
418 | } |
||
419 | for ( ; readers; readers = readers->Next) { |
||
420 | struct rc_reader * r = readers->Item; |
||
421 | if (r->Inst->Type == RC_INSTRUCTION_PAIR) { |
||
422 | if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { |
||
423 | can_change_writemask = 0; |
||
424 | break; |
||
425 | } |
||
426 | /* DDX/DDY also fail when their swizzles are changed. */ |
||
427 | if (is_derivative(r->Inst->U.P.RGB.Opcode) |
||
428 | || is_derivative(r->Inst->U.P.Alpha.Opcode)) { |
||
429 | can_change_writemask = 0; |
||
430 | break; |
||
431 | } |
||
432 | } |
||
433 | } |
||
434 | |||
435 | class_index = find_class(classes, writemask, |
||
436 | can_change_writemask ? 3 : 1); |
||
437 | done: |
||
438 | if (class_index > -1) { |
||
439 | return classes[class_index].ID; |
||
440 | } else { |
||
441 | error: |
||
442 | rc_error(variable->C, |
||
443 | "Could not find class for index=%u mask=%u\n", |
||
444 | variable->Dst.Index, writemask); |
||
445 | return 0; |
||
446 | } |
||
447 | } |
||
448 | |||
449 | static unsigned int overlap_live_intervals_array( |
||
450 | struct live_intervals * a, |
||
451 | struct live_intervals * b) |
||
452 | { |
||
453 | unsigned int a_chan, b_chan; |
||
454 | for (a_chan = 0; a_chan < 4; a_chan++) { |
||
455 | for (b_chan = 0; b_chan < 4; b_chan++) { |
||
456 | if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { |
||
457 | return 1; |
||
458 | } |
||
459 | } |
||
460 | } |
||
461 | return 0; |
||
462 | } |
||
463 | |||
464 | static unsigned int reg_get_index(int reg) |
||
465 | { |
||
466 | return reg / RC_MASK_XYZW; |
||
467 | } |
||
468 | |||
469 | static unsigned int reg_get_writemask(int reg) |
||
470 | { |
||
471 | return (reg % RC_MASK_XYZW) + 1; |
||
472 | } |
||
473 | |||
474 | static int get_reg_id(unsigned int index, unsigned int writemask) |
||
475 | { |
||
476 | assert(writemask); |
||
477 | if (writemask == 0) { |
||
478 | return 0; |
||
479 | } |
||
480 | return (index * RC_MASK_XYZW) + (writemask - 1); |
||
481 | } |
||
482 | |||
483 | #if VERBOSE |
||
484 | static void print_reg(int reg) |
||
485 | { |
||
486 | unsigned int index = reg_get_index(reg); |
||
487 | unsigned int mask = reg_get_writemask(reg); |
||
488 | fprintf(stderr, "Temp[%u].%c%c%c%c", index, |
||
489 | mask & RC_MASK_X ? 'x' : '_', |
||
490 | mask & RC_MASK_Y ? 'y' : '_', |
||
491 | mask & RC_MASK_Z ? 'z' : '_', |
||
492 | mask & RC_MASK_W ? 'w' : '_'); |
||
493 | } |
||
494 | #endif |
||
495 | |||
496 | static void add_register_conflicts( |
||
497 | struct ra_regs * regs, |
||
498 | unsigned int max_temp_regs) |
||
499 | { |
||
500 | unsigned int index, a_mask, b_mask; |
||
501 | for (index = 0; index < max_temp_regs; index++) { |
||
502 | for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { |
||
503 | for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; |
||
504 | b_mask++) { |
||
505 | if (a_mask & b_mask) { |
||
506 | ra_add_reg_conflict(regs, |
||
507 | get_reg_id(index, a_mask), |
||
508 | get_reg_id(index, b_mask)); |
||
509 | } |
||
510 | } |
||
511 | } |
||
512 | } |
||
513 | } |
||
514 | |||
515 | static void do_advanced_regalloc(struct regalloc_state * s) |
||
516 | { |
||
517 | |||
518 | unsigned int i, input_node, node_count, node_index; |
||
519 | unsigned int * node_classes; |
||
520 | struct rc_instruction * inst; |
||
521 | struct rc_list * var_ptr; |
||
522 | struct rc_list * variables; |
||
523 | struct ra_graph * graph; |
||
524 | const struct rc_regalloc_state *ra_state = s->C->regalloc_state; |
||
525 | |||
526 | /* Get list of program variables */ |
||
527 | variables = rc_get_variables(s->C); |
||
528 | node_count = rc_list_count(variables); |
||
529 | node_classes = memory_pool_malloc(&s->C->Pool, |
||
530 | node_count * sizeof(unsigned int)); |
||
531 | |||
532 | for (var_ptr = variables, node_index = 0; var_ptr; |
||
533 | var_ptr = var_ptr->Next, node_index++) { |
||
534 | unsigned int class_index; |
||
535 | /* Compute the live intervals */ |
||
536 | rc_variable_compute_live_intervals(var_ptr->Item); |
||
537 | |||
538 | class_index = variable_get_class(var_ptr->Item, rc_class_list); |
||
539 | node_classes[node_index] = ra_state->class_ids[class_index]; |
||
540 | } |
||
541 | |||
542 | |||
543 | /* Calculate live intervals for input registers */ |
||
544 | for (inst = s->C->Program.Instructions.Next; |
||
545 | inst != &s->C->Program.Instructions; |
||
546 | inst = inst->Next) { |
||
547 | rc_opcode op = rc_get_flow_control_inst(inst); |
||
548 | if (op == RC_OPCODE_BGNLOOP) { |
||
549 | struct rc_instruction * endloop = |
||
550 | rc_match_bgnloop(inst); |
||
551 | if (endloop->IP > s->LoopEnd) { |
||
552 | s->LoopEnd = endloop->IP; |
||
553 | } |
||
554 | } |
||
555 | rc_for_all_reads_mask(inst, scan_read_callback, s); |
||
556 | } |
||
557 | |||
558 | /* Compute the writemask for inputs. */ |
||
559 | for (i = 0; i < s->NumInputs; i++) { |
||
560 | unsigned int chan, writemask = 0; |
||
561 | for (chan = 0; chan < 4; chan++) { |
||
562 | if (s->Input[i].Live[chan].Used) { |
||
563 | writemask |= (1 << chan); |
||
564 | } |
||
565 | } |
||
566 | s->Input[i].Writemask = writemask; |
||
567 | } |
||
568 | |||
569 | graph = ra_alloc_interference_graph(ra_state->regs, |
||
570 | node_count + s->NumInputs); |
||
571 | |||
572 | for (node_index = 0; node_index < node_count; node_index++) { |
||
573 | ra_set_node_class(graph, node_index, node_classes[node_index]); |
||
574 | } |
||
575 | |||
576 | /* Build the interference graph */ |
||
577 | for (var_ptr = variables, node_index = 0; var_ptr; |
||
578 | var_ptr = var_ptr->Next,node_index++) { |
||
579 | struct rc_list * a, * b; |
||
580 | unsigned int b_index; |
||
581 | |||
582 | for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; |
||
583 | b; b = b->Next, b_index++) { |
||
584 | struct rc_variable * var_a = a->Item; |
||
585 | while (var_a) { |
||
586 | struct rc_variable * var_b = b->Item; |
||
587 | while (var_b) { |
||
588 | if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { |
||
589 | ra_add_node_interference(graph, |
||
590 | node_index, b_index); |
||
591 | } |
||
592 | var_b = var_b->Friend; |
||
593 | } |
||
594 | var_a = var_a->Friend; |
||
595 | } |
||
596 | } |
||
597 | } |
||
598 | |||
599 | /* Add input registers to the interference graph */ |
||
600 | for (i = 0, input_node = 0; i< s->NumInputs; i++) { |
||
601 | if (!s->Input[i].Writemask) { |
||
602 | continue; |
||
603 | } |
||
604 | for (var_ptr = variables, node_index = 0; |
||
605 | var_ptr; var_ptr = var_ptr->Next, node_index++) { |
||
606 | struct rc_variable * var = var_ptr->Item; |
||
607 | if (overlap_live_intervals_array(s->Input[i].Live, |
||
608 | var->Live)) { |
||
609 | ra_add_node_interference(graph, node_index, |
||
610 | node_count + input_node); |
||
611 | } |
||
612 | } |
||
613 | /* Manually allocate a register for this input */ |
||
614 | ra_set_node_reg(graph, node_count + input_node, get_reg_id( |
||
615 | s->Input[i].Index, s->Input[i].Writemask)); |
||
616 | input_node++; |
||
617 | } |
||
618 | |||
619 | if (!ra_allocate(graph)) { |
||
620 | rc_error(s->C, "Ran out of hardware temporaries\n"); |
||
621 | return; |
||
622 | } |
||
623 | |||
624 | /* Rewrite the registers */ |
||
625 | for (var_ptr = variables, node_index = 0; var_ptr; |
||
626 | var_ptr = var_ptr->Next, node_index++) { |
||
627 | int reg = ra_get_node_reg(graph, node_index); |
||
628 | unsigned int writemask = reg_get_writemask(reg); |
||
629 | unsigned int index = reg_get_index(reg); |
||
630 | struct rc_variable * var = var_ptr->Item; |
||
631 | |||
632 | if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { |
||
633 | writemask = rc_variable_writemask_sum(var); |
||
634 | } |
||
635 | |||
636 | if (var->Dst.File == RC_FILE_INPUT) { |
||
637 | continue; |
||
638 | } |
||
639 | rc_variable_change_dst(var, index, writemask); |
||
640 | } |
||
641 | |||
642 | ralloc_free(graph); |
||
643 | } |
||
644 | |||
645 | void rc_init_regalloc_state(struct rc_regalloc_state *s) |
||
646 | { |
||
647 | unsigned i, j, index; |
||
648 | unsigned **ra_q_values; |
||
649 | |||
650 | /* Pre-computed q values. This array describes the maximum number of |
||
651 | * a class's [row] registers that are in conflict with a single |
||
652 | * register from another class [column]. |
||
653 | * |
||
654 | * For example: |
||
655 | * q_values[0][2] is 3, because a register from class 2 |
||
656 | * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from |
||
657 | * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y, |
||
658 | * and T0.z. |
||
659 | * |
||
660 | * q_values[2][0] is 1, because a register from class 0 |
||
661 | * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from |
||
662 | * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz |
||
663 | * |
||
664 | * The q values for each register class [row] will never be greater |
||
665 | * than the maximum number of writemask combinations for that class. |
||
666 | * |
||
667 | * For example: |
||
668 | * |
||
669 | * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination, |
||
670 | * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater |
||
671 | * than 1. |
||
672 | */ |
||
673 | const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = { |
||
674 | {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}, |
||
675 | {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3}, |
||
676 | {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
||
677 | {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}, |
||
678 | {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3}, |
||
679 | {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}, |
||
680 | {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
||
681 | {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1}, |
||
682 | {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, |
||
683 | {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1}, |
||
684 | {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1}, |
||
685 | {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, |
||
686 | {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, |
||
687 | {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, |
||
688 | {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1}, |
||
689 | {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, |
||
690 | {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
||
691 | {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, |
||
692 | {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} |
||
693 | }; |
||
694 | |||
695 | /* Allocate the main ra data structure */ |
||
696 | s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW); |
||
697 | |||
698 | /* Create the register classes */ |
||
699 | for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
||
700 | const struct rc_class *class = &rc_class_list[i]; |
||
701 | s->class_ids[class->ID] = ra_alloc_reg_class(s->regs); |
||
702 | |||
703 | /* Assign registers to the classes */ |
||
704 | for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) { |
||
705 | for (j = 0; j < class->WritemaskCount; j++) { |
||
706 | int reg_id = get_reg_id(index, |
||
707 | class->Writemasks[j]); |
||
708 | ra_class_add_reg(s->regs, |
||
709 | s->class_ids[class->ID], reg_id); |
||
710 | } |
||
711 | } |
||
712 | } |
||
713 | |||
714 | /* Set the q values. The q_values array is indexed based on |
||
715 | * the rc_reg_class ID (RC_REG_CLASS_*) which might be |
||
716 | * different than the ID assigned to that class by ra. |
||
717 | * This why we need to manually construct this list. |
||
718 | */ |
||
719 | ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *)); |
||
720 | |||
721 | for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
||
722 | ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned)); |
||
723 | for (j = 0; j < RC_REG_CLASS_COUNT; j++) { |
||
724 | ra_q_values[s->class_ids[i]][s->class_ids[j]] = |
||
725 | q_values[i][j]; |
||
726 | } |
||
727 | } |
||
728 | |||
729 | /* Add register conflicts */ |
||
730 | add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS); |
||
731 | |||
732 | ra_set_finalize(s->regs, ra_q_values); |
||
733 | |||
734 | for (i = 0; i < RC_REG_CLASS_COUNT; i++) { |
||
735 | FREE(ra_q_values[i]); |
||
736 | } |
||
737 | FREE(ra_q_values); |
||
738 | } |
||
739 | |||
740 | void rc_destroy_regalloc_state(struct rc_regalloc_state *s) |
||
741 | { |
||
742 | ralloc_free(s->regs); |
||
743 | } |
||
744 | |||
745 | /** |
||
746 | * @param user This parameter should be a pointer to an integer value. If this |
||
747 | * integer value is zero, then a simple register allocator will be used that |
||
748 | * only allocates space for input registers (\sa do_regalloc_inputs_only). If |
||
749 | * user is non-zero, then the regular register allocator will be used |
||
750 | * (\sa do_regalloc). |
||
751 | */ |
||
752 | void rc_pair_regalloc(struct radeon_compiler *cc, void *user) |
||
753 | { |
||
754 | struct r300_fragment_program_compiler *c = |
||
755 | (struct r300_fragment_program_compiler*)cc; |
||
756 | struct regalloc_state s; |
||
757 | int * do_full_regalloc = (int*)user; |
||
758 | |||
759 | memset(&s, 0, sizeof(s)); |
||
760 | s.C = cc; |
||
761 | s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; |
||
762 | s.Input = memory_pool_malloc(&cc->Pool, |
||
763 | s.NumInputs * sizeof(struct register_info)); |
||
764 | memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); |
||
765 | |||
766 | s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; |
||
767 | s.Temporary = memory_pool_malloc(&cc->Pool, |
||
768 | s.NumTemporaries * sizeof(struct register_info)); |
||
769 | memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); |
||
770 | |||
771 | rc_recompute_ips(s.C); |
||
772 | |||
773 | c->AllocateHwInputs(c, &alloc_input_simple, &s); |
||
774 | if (*do_full_regalloc) { |
||
775 | do_advanced_regalloc(&s); |
||
776 | } else { |
||
777 | s.Simple = 1; |
||
778 | do_regalloc_inputs_only(&s); |
||
779 | } |
||
780 | |||
781 | /* Rewrite inputs and if we are doing the simple allocation, rewrite |
||
782 | * temporaries too. */ |
||
783 | for (struct rc_instruction *inst = s.C->Program.Instructions.Next; |
||
784 | inst != &s.C->Program.Instructions; |
||
785 | inst = inst->Next) { |
||
786 | rc_remap_registers(inst, &remap_register, &s); |
||
787 | } |
||
788 | }>>>>>>>>><>>>=>=>>>>>>>>>>>>> |