Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright © 2012 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Eric Anholt |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | #include "brw_cfg.h" |
||
29 | #include "brw_fs_live_variables.h" |
||
30 | |||
31 | using namespace brw; |
||
32 | |||
33 | /** @file brw_fs_live_variables.cpp |
||
34 | * |
||
35 | * Support for computing at the basic block level which variables |
||
36 | * (virtual GRFs in our case) are live at entry and exit. |
||
37 | * |
||
38 | * See Muchnik's Advanced Compiler Design and Implementation, section |
||
39 | * 14.1 (p444). |
||
40 | */ |
||
41 | |||
42 | /** |
||
43 | * Sets up the use[] and def[] bitsets. |
||
44 | * |
||
45 | * The basic-block-level live variable analysis needs to know which |
||
46 | * variables get used before they're completely defined, and which |
||
47 | * variables are completely defined before they're used. |
||
48 | */ |
||
49 | void |
||
50 | fs_live_variables::setup_def_use() |
||
51 | { |
||
52 | int ip = 0; |
||
53 | |||
54 | for (int b = 0; b < cfg->num_blocks; b++) { |
||
55 | bblock_t *block = cfg->blocks[b]; |
||
56 | |||
57 | assert(ip == block->start_ip); |
||
58 | if (b > 0) |
||
59 | assert(cfg->blocks[b - 1]->end_ip == ip - 1); |
||
60 | |||
61 | for (fs_inst *inst = (fs_inst *)block->start; |
||
62 | inst != block->end->next; |
||
63 | inst = (fs_inst *)inst->next) { |
||
64 | |||
65 | /* Set use[] for this instruction */ |
||
66 | for (unsigned int i = 0; i < 3; i++) { |
||
67 | if (inst->src[i].file == GRF) { |
||
68 | int reg = inst->src[i].reg; |
||
69 | |||
70 | if (!BITSET_TEST(bd[b].def, reg)) |
||
71 | BITSET_SET(bd[b].use, reg); |
||
72 | } |
||
73 | } |
||
74 | |||
75 | /* Check for unconditional writes to whole registers. These |
||
76 | * are the things that screen off preceding definitions of a |
||
77 | * variable, and thus qualify for being in def[]. |
||
78 | */ |
||
79 | if (inst->dst.file == GRF && |
||
80 | inst->regs_written == v->virtual_grf_sizes[inst->dst.reg] && |
||
81 | !inst->is_partial_write()) { |
||
82 | int reg = inst->dst.reg; |
||
83 | if (!BITSET_TEST(bd[b].use, reg)) |
||
84 | BITSET_SET(bd[b].def, reg); |
||
85 | } |
||
86 | |||
87 | ip++; |
||
88 | } |
||
89 | } |
||
90 | } |
||
91 | |||
92 | /** |
||
93 | * The algorithm incrementally sets bits in liveout and livein, |
||
94 | * propagating it through control flow. It will eventually terminate |
||
95 | * because it only ever adds bits, and stops when no bits are added in |
||
96 | * a pass. |
||
97 | */ |
||
98 | void |
||
99 | fs_live_variables::compute_live_variables() |
||
100 | { |
||
101 | bool cont = true; |
||
102 | |||
103 | while (cont) { |
||
104 | cont = false; |
||
105 | |||
106 | for (int b = 0; b < cfg->num_blocks; b++) { |
||
107 | /* Update livein */ |
||
108 | for (int i = 0; i < bitset_words; i++) { |
||
109 | BITSET_WORD new_livein = (bd[b].use[i] | |
||
110 | (bd[b].liveout[i] & ~bd[b].def[i])); |
||
111 | if (new_livein & ~bd[b].livein[i]) { |
||
112 | bd[b].livein[i] |= new_livein; |
||
113 | cont = true; |
||
114 | } |
||
115 | } |
||
116 | |||
117 | /* Update liveout */ |
||
118 | foreach_list(block_node, &cfg->blocks[b]->children) { |
||
119 | bblock_link *link = (bblock_link *)block_node; |
||
120 | bblock_t *block = link->block; |
||
121 | |||
122 | for (int i = 0; i < bitset_words; i++) { |
||
123 | BITSET_WORD new_liveout = (bd[block->block_num].livein[i] & |
||
124 | ~bd[b].liveout[i]); |
||
125 | if (new_liveout) { |
||
126 | bd[b].liveout[i] |= new_liveout; |
||
127 | cont = true; |
||
128 | } |
||
129 | } |
||
130 | } |
||
131 | } |
||
132 | } |
||
133 | } |
||
134 | |||
135 | fs_live_variables::fs_live_variables(fs_visitor *v, cfg_t *cfg) |
||
136 | : v(v), cfg(cfg) |
||
137 | { |
||
138 | mem_ctx = ralloc_context(cfg->mem_ctx); |
||
139 | |||
140 | num_vars = v->virtual_grf_count; |
||
141 | bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); |
||
142 | |||
143 | bitset_words = BITSET_WORDS(v->virtual_grf_count); |
||
144 | for (int i = 0; i < cfg->num_blocks; i++) { |
||
145 | bd[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); |
||
146 | bd[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); |
||
147 | bd[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); |
||
148 | bd[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words); |
||
149 | } |
||
150 | |||
151 | setup_def_use(); |
||
152 | compute_live_variables(); |
||
153 | } |
||
154 | |||
155 | fs_live_variables::~fs_live_variables() |
||
156 | { |
||
157 | ralloc_free(mem_ctx); |
||
158 | } |
||
159 | |||
160 | #define MAX_INSTRUCTION (1 << 30) |
||
161 | |||
162 | void |
||
163 | fs_visitor::calculate_live_intervals() |
||
164 | { |
||
165 | int num_vars = this->virtual_grf_count; |
||
166 | |||
167 | if (this->live_intervals_valid) |
||
168 | return; |
||
169 | |||
170 | int *start = ralloc_array(mem_ctx, int, num_vars); |
||
171 | int *end = ralloc_array(mem_ctx, int, num_vars); |
||
172 | ralloc_free(this->virtual_grf_start); |
||
173 | ralloc_free(this->virtual_grf_end); |
||
174 | this->virtual_grf_start = start; |
||
175 | this->virtual_grf_end = end; |
||
176 | |||
177 | for (int i = 0; i < num_vars; i++) { |
||
178 | start[i] = MAX_INSTRUCTION; |
||
179 | end[i] = -1; |
||
180 | } |
||
181 | |||
182 | /* Start by setting up the intervals with no knowledge of control |
||
183 | * flow. |
||
184 | */ |
||
185 | int ip = 0; |
||
186 | foreach_list(node, &this->instructions) { |
||
187 | fs_inst *inst = (fs_inst *)node; |
||
188 | |||
189 | for (unsigned int i = 0; i < 3; i++) { |
||
190 | if (inst->src[i].file == GRF) { |
||
191 | int reg = inst->src[i].reg; |
||
192 | int end_ip = ip; |
||
193 | |||
194 | /* In most cases, a register can be written over safely by the |
||
195 | * same instruction that is its last use. For a single |
||
196 | * instruction, the sources are dereferenced before writing of the |
||
197 | * destination starts (naturally). This gets more complicated for |
||
198 | * simd16, because the instruction: |
||
199 | * |
||
200 | * mov(16) g4<1>F g4<8,8,1>F g6<8,8,1>F |
||
201 | * |
||
202 | * is actually decoded in hardware as: |
||
203 | * |
||
204 | * mov(8) g4<1>F g4<8,8,1>F g6<8,8,1>F |
||
205 | * mov(8) g5<1>F g5<8,8,1>F g7<8,8,1>F |
||
206 | * |
||
207 | * Which is safe. However, if we have uniform accesses |
||
208 | * happening, we get into trouble: |
||
209 | * |
||
210 | * mov(8) g4<1>F g4<0,1,0>F g6<8,8,1>F |
||
211 | * mov(8) g5<1>F g4<0,1,0>F g7<8,8,1>F |
||
212 | * |
||
213 | * Now our destination for the first instruction overwrote the |
||
214 | * second instruction's src0, and we get garbage for those 8 |
||
215 | * pixels. There's a similar issue for the pre-gen6 |
||
216 | * pixel_x/pixel_y, which are registers of 16-bit values and thus |
||
217 | * would get stomped by the first decode as well. |
||
218 | */ |
||
219 | if (dispatch_width == 16 && (inst->src[i].smear >= 0 || |
||
220 | (this->pixel_x.reg == reg || |
||
221 | this->pixel_y.reg == reg))) { |
||
222 | end_ip++; |
||
223 | } |
||
224 | |||
225 | start[reg] = MIN2(start[reg], ip); |
||
226 | end[reg] = MAX2(end[reg], end_ip); |
||
227 | } |
||
228 | } |
||
229 | |||
230 | if (inst->dst.file == GRF) { |
||
231 | int reg = inst->dst.reg; |
||
232 | |||
233 | start[reg] = MIN2(start[reg], ip); |
||
234 | end[reg] = MAX2(end[reg], ip); |
||
235 | } |
||
236 | |||
237 | ip++; |
||
238 | } |
||
239 | |||
240 | /* Now, extend those intervals using our analysis of control flow. */ |
||
241 | cfg_t cfg(this); |
||
242 | fs_live_variables livevars(this, &cfg); |
||
243 | |||
244 | for (int b = 0; b < cfg.num_blocks; b++) { |
||
245 | for (int i = 0; i < num_vars; i++) { |
||
246 | if (BITSET_TEST(livevars.bd[b].livein, i)) { |
||
247 | start[i] = MIN2(start[i], cfg.blocks[b]->start_ip); |
||
248 | end[i] = MAX2(end[i], cfg.blocks[b]->start_ip); |
||
249 | } |
||
250 | |||
251 | if (BITSET_TEST(livevars.bd[b].liveout, i)) { |
||
252 | start[i] = MIN2(start[i], cfg.blocks[b]->end_ip); |
||
253 | end[i] = MAX2(end[i], cfg.blocks[b]->end_ip); |
||
254 | } |
||
255 | } |
||
256 | } |
||
257 | |||
258 | this->live_intervals_valid = true; |
||
259 | } |
||
260 | |||
261 | bool |
||
262 | fs_visitor::virtual_grf_interferes(int a, int b) |
||
263 | { |
||
264 | return !(virtual_grf_end[a] <= virtual_grf_start[b] || |
||
265 | virtual_grf_end[b] <= virtual_grf_start[a]); |
||
266 | }=>=>>>8,8,1>0,1,0>1>8,8,1>0,1,0>1>8,8,1>8,8,1>1>8,8,1>8,8,1>1>8,8,1>8,8,1>1>>>><>>>>>>> |