Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2015 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Jason Ekstrand |
||
25 | */ |
||
26 | |||
27 | #include "brw_nir.h" |
||
28 | |||
29 | /* |
||
30 | * This file implements an analysis pass that determines when we have to do |
||
31 | * a boolean resolve on Gen <= 5. Instructions that need a boolean resolve |
||
32 | * will have the booleans portion of the instr->pass_flags field set to |
||
33 | * BRW_NIR_BOOLEAN_NEEDS_RESOLVE. |
||
34 | */ |
||
35 | |||
36 | |||
37 | /** Returns the resolve status for the given source |
||
38 | * |
||
39 | * If the source has a parent instruction then the resolve status is the |
||
40 | * status of the parent instruction. If the source does not have a parent |
||
41 | * instruction then we don't know so we return NON_BOOLEAN. |
||
42 | */ |
||
43 | static uint8_t |
||
44 | get_resolve_status_for_src(nir_src *src) |
||
45 | { |
||
46 | nir_instr *src_instr = nir_src_get_parent_instr(src); |
||
47 | if (src_instr) { |
||
48 | uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; |
||
49 | |||
50 | /* If the source instruction needs resolve, then from the perspective |
||
51 | * of the user, it's a true boolean. |
||
52 | */ |
||
53 | if (resolve_status == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) |
||
54 | resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE; |
||
55 | return resolve_status; |
||
56 | } else { |
||
57 | return BRW_NIR_NON_BOOLEAN; |
||
58 | } |
||
59 | } |
||
60 | |||
61 | /** Marks the given source as needing a resolve |
||
62 | * |
||
63 | * If the given source corresponds to an unresolved boolean it marks it as |
||
64 | * needing a resolve. Otherwise, we leave it alone. |
||
65 | */ |
||
66 | static bool |
||
67 | src_mark_needs_resolve(nir_src *src, void *void_state) |
||
68 | { |
||
69 | nir_instr *src_instr = nir_src_get_parent_instr(src); |
||
70 | if (src_instr) { |
||
71 | uint8_t resolve_status = src_instr->pass_flags & BRW_NIR_BOOLEAN_MASK; |
||
72 | |||
73 | /* If the source instruction is unresolved, then mark it as needing |
||
74 | * to be resolved. |
||
75 | */ |
||
76 | if (resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) { |
||
77 | src_instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK; |
||
78 | src_instr->pass_flags |= BRW_NIR_BOOLEAN_NEEDS_RESOLVE; |
||
79 | } |
||
80 | |||
81 | } |
||
82 | |||
83 | return true; |
||
84 | } |
||
85 | |||
86 | static bool |
||
87 | analyze_boolean_resolves_block(nir_block *block, void *void_state) |
||
88 | { |
||
89 | nir_foreach_instr(block, instr) { |
||
90 | switch (instr->type) { |
||
91 | case nir_instr_type_alu: { |
||
92 | /* For ALU instructions, the resolve status is handled in a |
||
93 | * three-step process. |
||
94 | * |
||
95 | * 1) Look at the instruction type and sources and determine if it |
||
96 | * can be left unresolved. |
||
97 | * |
||
98 | * 2) Look at the destination and see if we have to resolve |
||
99 | * anyway. (This is the case if this instruction is not the |
||
100 | * only instruction writing to a given register.) |
||
101 | * |
||
102 | * 3) If the instruction has a resolve status other than |
||
103 | * BOOL_UNRESOLVED or BOOL_NEEDS_RESOLVE then we walk through |
||
104 | * the sources and ensure that they are also resolved. This |
||
105 | * ensures that we don't end up with any stray unresolved |
||
106 | * booleans going into ADDs or something like that. |
||
107 | */ |
||
108 | |||
109 | uint8_t resolve_status; |
||
110 | nir_alu_instr *alu = nir_instr_as_alu(instr); |
||
111 | switch (alu->op) { |
||
112 | case nir_op_flt: |
||
113 | case nir_op_ilt: |
||
114 | case nir_op_ult: |
||
115 | case nir_op_fge: |
||
116 | case nir_op_ige: |
||
117 | case nir_op_uge: |
||
118 | case nir_op_feq: |
||
119 | case nir_op_ieq: |
||
120 | case nir_op_fne: |
||
121 | case nir_op_ine: |
||
122 | case nir_op_f2b: |
||
123 | case nir_op_i2b: |
||
124 | /* This instruction will turn into a CMP when we actually emit |
||
125 | * so the result will have to be resolved before it can be used. |
||
126 | */ |
||
127 | resolve_status = BRW_NIR_BOOLEAN_UNRESOLVED; |
||
128 | |||
129 | /* Even though the destination is allowed to be left unresolved, |
||
130 | * the sources are treated as regular integers or floats so |
||
131 | * they need to be resolved. |
||
132 | */ |
||
133 | nir_foreach_src(instr, src_mark_needs_resolve, NULL); |
||
134 | break; |
||
135 | |||
136 | case nir_op_imov: |
||
137 | case nir_op_inot: |
||
138 | /* This is a single-source instruction. Just copy the resolve |
||
139 | * status from the source. |
||
140 | */ |
||
141 | resolve_status = get_resolve_status_for_src(&alu->src[0].src); |
||
142 | break; |
||
143 | |||
144 | case nir_op_iand: |
||
145 | case nir_op_ior: |
||
146 | case nir_op_ixor: { |
||
147 | uint8_t src0_status = get_resolve_status_for_src(&alu->src[0].src); |
||
148 | uint8_t src1_status = get_resolve_status_for_src(&alu->src[1].src); |
||
149 | |||
150 | if (src0_status == src1_status) { |
||
151 | resolve_status = src0_status; |
||
152 | } else if (src0_status == BRW_NIR_NON_BOOLEAN || |
||
153 | src1_status == BRW_NIR_NON_BOOLEAN) { |
||
154 | /* If one of the sources is a non-boolean then the whole |
||
155 | * thing is a non-boolean. |
||
156 | */ |
||
157 | resolve_status = BRW_NIR_NON_BOOLEAN; |
||
158 | } else { |
||
159 | /* At this point one of them is a true boolean and one is a |
||
160 | * boolean that needs a resolve. We could either resolve the |
||
161 | * unresolved source or we could resolve here. If we resolve |
||
162 | * the unresolved source then we get two resolves for the price |
||
163 | * of one. Just set this one to BOOLEAN_NO_RESOLVE and we'll |
||
164 | * let the code below force a resolve on the unresolved source. |
||
165 | */ |
||
166 | resolve_status = BRW_NIR_BOOLEAN_NO_RESOLVE; |
||
167 | } |
||
168 | break; |
||
169 | } |
||
170 | |||
171 | default: |
||
172 | resolve_status = BRW_NIR_NON_BOOLEAN; |
||
173 | } |
||
174 | |||
175 | /* If the destination is SSA-like, go ahead allow unresolved booleans. |
||
176 | * If the destination register doesn't have a well-defined parent_instr |
||
177 | * we need to resolve immediately. |
||
178 | */ |
||
179 | if (alu->dest.dest.reg.reg->parent_instr == NULL && |
||
180 | resolve_status == BRW_NIR_BOOLEAN_UNRESOLVED) { |
||
181 | resolve_status = BRW_NIR_BOOLEAN_NEEDS_RESOLVE; |
||
182 | } |
||
183 | |||
184 | instr->pass_flags = (instr->pass_flags & ~BRW_NIR_BOOLEAN_MASK) | |
||
185 | resolve_status; |
||
186 | |||
187 | /* Finally, resolve sources if it's needed */ |
||
188 | switch (resolve_status) { |
||
189 | case BRW_NIR_BOOLEAN_NEEDS_RESOLVE: |
||
190 | case BRW_NIR_BOOLEAN_UNRESOLVED: |
||
191 | /* This instruction is either unresolved or we're doing the |
||
192 | * resolve here; leave the sources alone. |
||
193 | */ |
||
194 | break; |
||
195 | |||
196 | case BRW_NIR_BOOLEAN_NO_RESOLVE: |
||
197 | case BRW_NIR_NON_BOOLEAN: |
||
198 | nir_foreach_src(instr, src_mark_needs_resolve, NULL); |
||
199 | break; |
||
200 | |||
201 | default: |
||
202 | unreachable("Invalid boolean flag"); |
||
203 | } |
||
204 | |||
205 | break; |
||
206 | } |
||
207 | |||
208 | case nir_instr_type_load_const: { |
||
209 | nir_load_const_instr *load = nir_instr_as_load_const(instr); |
||
210 | |||
211 | /* For load_const instructions, it's a boolean exactly when it holds |
||
212 | * one of the values NIR_TRUE or NIR_FALSE. |
||
213 | * |
||
214 | * Since load_const instructions don't have any sources, we don't |
||
215 | * have to worry about resolving them. |
||
216 | */ |
||
217 | instr->pass_flags &= ~BRW_NIR_BOOLEAN_MASK; |
||
218 | if (load->value.u[0] == NIR_TRUE || load->value.u[0] == NIR_FALSE) { |
||
219 | instr->pass_flags |= BRW_NIR_BOOLEAN_NO_RESOLVE; |
||
220 | } else { |
||
221 | instr->pass_flags |= BRW_NIR_NON_BOOLEAN; |
||
222 | } |
||
223 | continue; |
||
224 | } |
||
225 | |||
226 | default: |
||
227 | /* Everything else is an unknown non-boolean value and needs to |
||
228 | * have all sources resolved. |
||
229 | */ |
||
230 | instr->pass_flags = (instr->pass_flags & ~BRW_NIR_BOOLEAN_MASK) | |
||
231 | BRW_NIR_NON_BOOLEAN; |
||
232 | nir_foreach_src(instr, src_mark_needs_resolve, NULL); |
||
233 | continue; |
||
234 | } |
||
235 | } |
||
236 | |||
237 | nir_if *following_if = nir_block_get_following_if(block); |
||
238 | if (following_if) |
||
239 | src_mark_needs_resolve(&following_if->condition, NULL); |
||
240 | |||
241 | return true; |
||
242 | } |
||
243 | |||
244 | static void |
||
245 | analyze_boolean_resolves_impl(nir_function_impl *impl) |
||
246 | { |
||
247 | nir_foreach_block(impl, analyze_boolean_resolves_block, NULL); |
||
248 | } |
||
249 | |||
250 | void |
||
251 | brw_nir_analyze_boolean_resolves(nir_shader *shader) |
||
252 | { |
||
253 | nir_foreach_overload(shader, overload) |
||
254 | if (overload->impl) |
||
255 | analyze_boolean_resolves_impl(overload->impl); |
||
256 | }=> |