Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2014 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include "pipe/p_shader_tokens.h" |
||
30 | #include "util/u_math.h" |
||
31 | |||
32 | #include "freedreno_util.h" |
||
33 | |||
34 | #include "ir3.h" |
||
35 | |||
36 | /* |
||
37 | * Legalize: |
||
38 | * |
||
39 | * We currently require that scheduling ensures that we have enough nop's |
||
40 | * in all the right places. The legalize step mostly handles fixing up |
||
41 | * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's |
||
42 | * into fewer nop's w/ rpt flag. |
||
43 | */ |
||
44 | |||
45 | struct ir3_legalize_ctx { |
||
46 | struct ir3_block *block; |
||
47 | bool has_samp; |
||
48 | int max_bary; |
||
49 | }; |
||
50 | |||
51 | static void legalize(struct ir3_legalize_ctx *ctx) |
||
52 | { |
||
53 | struct ir3_block *block = ctx->block; |
||
54 | struct ir3_instruction *n; |
||
55 | struct ir3 *shader = block->shader; |
||
56 | struct ir3_instruction *end = |
||
57 | ir3_instr_create(block, 0, OPC_END); |
||
58 | struct ir3_instruction *last_input = NULL; |
||
59 | struct ir3_instruction *last_rel = NULL; |
||
60 | regmask_t needs_ss_war; /* write after read */ |
||
61 | regmask_t needs_ss; |
||
62 | regmask_t needs_sy; |
||
63 | |||
64 | regmask_init(&needs_ss_war); |
||
65 | regmask_init(&needs_ss); |
||
66 | regmask_init(&needs_sy); |
||
67 | |||
68 | shader->instrs_count = 0; |
||
69 | |||
70 | for (n = block->head; n; n = n->next) { |
||
71 | struct ir3_register *reg; |
||
72 | unsigned i; |
||
73 | |||
74 | if (is_meta(n)) |
||
75 | continue; |
||
76 | |||
77 | if (is_input(n)) { |
||
78 | struct ir3_register *inloc = n->regs[1]; |
||
79 | assert(inloc->flags & IR3_REG_IMMED); |
||
80 | ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val); |
||
81 | } |
||
82 | |||
83 | /* NOTE: consider dst register too.. it could happen that |
||
84 | * texture sample instruction (for example) writes some |
||
85 | * components which are unused. A subsequent instruction |
||
86 | * that writes the same register can race w/ the sam instr |
||
87 | * resulting in undefined results: |
||
88 | */ |
||
89 | for (i = 0; i < n->regs_count; i++) { |
||
90 | reg = n->regs[i]; |
||
91 | |||
92 | if (reg_gpr(reg)) { |
||
93 | |||
94 | /* TODO: we probably only need (ss) for alu |
||
95 | * instr consuming sfu result.. need to make |
||
96 | * some tests for both this and (sy).. |
||
97 | */ |
||
98 | if (regmask_get(&needs_ss, reg)) { |
||
99 | n->flags |= IR3_INSTR_SS; |
||
100 | regmask_init(&needs_ss); |
||
101 | } |
||
102 | |||
103 | if (regmask_get(&needs_sy, reg)) { |
||
104 | n->flags |= IR3_INSTR_SY; |
||
105 | regmask_init(&needs_sy); |
||
106 | } |
||
107 | } |
||
108 | |||
109 | /* TODO: is it valid to have address reg loaded from a |
||
110 | * relative src (ie. mova a0, c |
||
111 | * last_rel check below should be moved ahead of this: |
||
112 | */ |
||
113 | if (reg->flags & IR3_REG_RELATIV) |
||
114 | last_rel = n; |
||
115 | } |
||
116 | |||
117 | if (n->regs_count > 0) { |
||
118 | reg = n->regs[0]; |
||
119 | if (regmask_get(&needs_ss_war, reg)) { |
||
120 | n->flags |= IR3_INSTR_SS; |
||
121 | regmask_init(&needs_ss_war); // ??? I assume? |
||
122 | } |
||
123 | |||
124 | if (last_rel && (reg->num == regid(REG_A0, 0))) { |
||
125 | last_rel->flags |= IR3_INSTR_UL; |
||
126 | last_rel = NULL; |
||
127 | } |
||
128 | } |
||
129 | |||
130 | /* cat5+ does not have an (ss) bit, if needed we need to |
||
131 | * insert a nop to carry the sync flag. Would be kinda |
||
132 | * clever if we were aware of this during scheduling, but |
||
133 | * this should be a pretty rare case: |
||
134 | */ |
||
135 | if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { |
||
136 | struct ir3_instruction *nop; |
||
137 | nop = ir3_instr_create(block, 0, OPC_NOP); |
||
138 | nop->flags |= IR3_INSTR_SS; |
||
139 | n->flags &= ~IR3_INSTR_SS; |
||
140 | } |
||
141 | |||
142 | /* need to be able to set (ss) on first instruction: */ |
||
143 | if ((shader->instrs_count == 0) && (n->category >= 5)) |
||
144 | ir3_instr_create(block, 0, OPC_NOP); |
||
145 | |||
146 | if (is_nop(n) && shader->instrs_count) { |
||
147 | struct ir3_instruction *last = |
||
148 | shader->instrs[shader->instrs_count-1]; |
||
149 | if (is_nop(last) && (last->repeat < 5)) { |
||
150 | last->repeat++; |
||
151 | last->flags |= n->flags; |
||
152 | continue; |
||
153 | } |
||
154 | } |
||
155 | |||
156 | shader->instrs[shader->instrs_count++] = n; |
||
157 | |||
158 | if (is_sfu(n)) |
||
159 | regmask_set(&needs_ss, n->regs[0]); |
||
160 | |||
161 | if (is_tex(n)) { |
||
162 | /* this ends up being the # of samp instructions.. but that |
||
163 | * is ok, everything else only cares whether it is zero or |
||
164 | * not. We do this here, rather than when we encounter a |
||
165 | * SAMP decl, because (especially in binning pass shader) |
||
166 | * the samp instruction(s) could get eliminated if the |
||
167 | * result is not used. |
||
168 | */ |
||
169 | ctx->has_samp = true; |
||
170 | regmask_set(&needs_sy, n->regs[0]); |
||
171 | } else if (is_mem(n)) { |
||
172 | regmask_set(&needs_sy, n->regs[0]); |
||
173 | } |
||
174 | |||
175 | /* both tex/sfu appear to not always immediately consume |
||
176 | * their src register(s): |
||
177 | */ |
||
178 | if (is_tex(n) || is_sfu(n) || is_mem(n)) { |
||
179 | foreach_src(reg, n) { |
||
180 | if (reg_gpr(reg)) |
||
181 | regmask_set(&needs_ss_war, reg); |
||
182 | } |
||
183 | } |
||
184 | |||
185 | if (is_input(n)) |
||
186 | last_input = n; |
||
187 | } |
||
188 | |||
189 | if (last_input) { |
||
190 | /* special hack.. if using ldlv to bypass interpolation, |
||
191 | * we need to insert a dummy bary.f on which we can set |
||
192 | * the (ei) flag: |
||
193 | */ |
||
194 | if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) { |
||
195 | int i, cnt; |
||
196 | |||
197 | /* note that ir3_instr_create() inserts into |
||
198 | * shader->instrs[] and increments the count.. |
||
199 | * so we need to bump up the cnt initially (to |
||
200 | * avoid it clobbering the last real instr) and |
||
201 | * restore it after. |
||
202 | */ |
||
203 | cnt = ++shader->instrs_count; |
||
204 | |||
205 | /* inserting instructions would be a bit nicer if list.. */ |
||
206 | for (i = cnt - 2; i >= 0; i--) { |
||
207 | if (shader->instrs[i] == last_input) { |
||
208 | |||
209 | /* (ss)bary.f (ei)r63.x, 0, r0.x */ |
||
210 | last_input = ir3_instr_create(block, 2, OPC_BARY_F); |
||
211 | last_input->flags |= IR3_INSTR_SS; |
||
212 | ir3_reg_create(last_input, regid(63, 0), 0); |
||
213 | ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0; |
||
214 | ir3_reg_create(last_input, regid(0, 0), 0); |
||
215 | |||
216 | shader->instrs[i + 1] = last_input; |
||
217 | |||
218 | break; |
||
219 | } |
||
220 | shader->instrs[i + 1] = shader->instrs[i]; |
||
221 | } |
||
222 | |||
223 | shader->instrs_count = cnt; |
||
224 | } |
||
225 | last_input->regs[0]->flags |= IR3_REG_EI; |
||
226 | } |
||
227 | |||
228 | if (last_rel) |
||
229 | last_rel->flags |= IR3_INSTR_UL; |
||
230 | |||
231 | shader->instrs[shader->instrs_count++] = end; |
||
232 | |||
233 | shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; |
||
234 | } |
||
235 | |||
236 | void ir3_block_legalize(struct ir3_block *block, |
||
237 | bool *has_samp, int *max_bary) |
||
238 | { |
||
239 | struct ir3_legalize_ctx ctx = { |
||
240 | .block = block, |
||
241 | .max_bary = -1, |
||
242 | }; |
||
243 | |||
244 | legalize(&ctx); |
||
245 | |||
246 | *has_samp = ctx.has_samp; |
||
247 | *max_bary = ctx.max_bary; |
||
248 | }>> |