Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "brw_context.h" |
||
34 | #include "brw_defines.h" |
||
35 | #include "brw_eu.h" |
||
36 | |||
37 | #include "util/ralloc.h" |
||
38 | |||
39 | /** |
||
40 | * Prior to Sandybridge, the SEND instruction accepted non-MRF source |
||
41 | * registers, implicitly moving the operand to a message register. |
||
42 | * |
||
43 | * On Sandybridge, this is no longer the case. This function performs the |
||
44 | * explicit move; it should be called before emitting a SEND instruction. |
||
45 | */ |
||
46 | void |
||
47 | gen6_resolve_implied_move(struct brw_codegen *p, |
||
48 | struct brw_reg *src, |
||
49 | unsigned msg_reg_nr) |
||
50 | { |
||
51 | const struct brw_device_info *devinfo = p->devinfo; |
||
52 | if (devinfo->gen < 6) |
||
53 | return; |
||
54 | |||
55 | if (src->file == BRW_MESSAGE_REGISTER_FILE) |
||
56 | return; |
||
57 | |||
58 | if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { |
||
59 | brw_push_insn_state(p); |
||
60 | brw_set_default_exec_size(p, BRW_EXECUTE_8); |
||
61 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
62 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
63 | brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), |
||
64 | retype(*src, BRW_REGISTER_TYPE_UD)); |
||
65 | brw_pop_insn_state(p); |
||
66 | } |
||
67 | *src = brw_message_reg(msg_reg_nr); |
||
68 | } |
||
69 | |||
70 | static void |
||
71 | gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg) |
||
72 | { |
||
73 | /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"): |
||
74 | * "The send with EOT should use register space R112-R127 for |
||
75 | * to enable loading of a new thread into the same slot while the message |
||
76 | * with EOT for current thread is pending dispatch." |
||
77 | * |
||
78 | * Since we're pretending to have 16 MRFs anyway, we may as well use the |
||
79 | * registers required for messages with EOT. |
||
80 | */ |
||
81 | const struct brw_device_info *devinfo = p->devinfo; |
||
82 | if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { |
||
83 | reg->file = BRW_GENERAL_REGISTER_FILE; |
||
84 | reg->nr += GEN7_MRF_HACK_START; |
||
85 | } |
||
86 | } |
||
87 | |||
88 | /** |
||
89 | * Convert a brw_reg_type enumeration value into the hardware representation. |
||
90 | * |
||
91 | * The hardware encoding may depend on whether the value is an immediate. |
||
92 | */ |
||
93 | unsigned |
||
94 | brw_reg_type_to_hw_type(const struct brw_device_info *devinfo, |
||
95 | enum brw_reg_type type, unsigned file) |
||
96 | { |
||
97 | if (file == BRW_IMMEDIATE_VALUE) { |
||
98 | const static int imm_hw_types[] = { |
||
99 | [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, |
||
100 | [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, |
||
101 | [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, |
||
102 | [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W, |
||
103 | [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F, |
||
104 | [BRW_REGISTER_TYPE_UB] = -1, |
||
105 | [BRW_REGISTER_TYPE_B] = -1, |
||
106 | [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV, |
||
107 | [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF, |
||
108 | [BRW_REGISTER_TYPE_V] = BRW_HW_REG_IMM_TYPE_V, |
||
109 | [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF, |
||
110 | [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF, |
||
111 | [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ, |
||
112 | [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q, |
||
113 | }; |
||
114 | assert(type < ARRAY_SIZE(imm_hw_types)); |
||
115 | assert(imm_hw_types[type] != -1); |
||
116 | assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_DF); |
||
117 | return imm_hw_types[type]; |
||
118 | } else { |
||
119 | /* Non-immediate registers */ |
||
120 | const static int hw_types[] = { |
||
121 | [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD, |
||
122 | [BRW_REGISTER_TYPE_D] = BRW_HW_REG_TYPE_D, |
||
123 | [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW, |
||
124 | [BRW_REGISTER_TYPE_W] = BRW_HW_REG_TYPE_W, |
||
125 | [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB, |
||
126 | [BRW_REGISTER_TYPE_B] = BRW_HW_REG_NON_IMM_TYPE_B, |
||
127 | [BRW_REGISTER_TYPE_F] = BRW_HW_REG_TYPE_F, |
||
128 | [BRW_REGISTER_TYPE_UV] = -1, |
||
129 | [BRW_REGISTER_TYPE_VF] = -1, |
||
130 | [BRW_REGISTER_TYPE_V] = -1, |
||
131 | [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF, |
||
132 | [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF, |
||
133 | [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ, |
||
134 | [BRW_REGISTER_TYPE_Q] = GEN8_HW_REG_TYPE_Q, |
||
135 | }; |
||
136 | assert(type < ARRAY_SIZE(hw_types)); |
||
137 | assert(hw_types[type] != -1); |
||
138 | assert(devinfo->gen >= 7 || type < BRW_REGISTER_TYPE_DF); |
||
139 | assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_HF); |
||
140 | return hw_types[type]; |
||
141 | } |
||
142 | } |
||
143 | |||
144 | void |
||
145 | brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest) |
||
146 | { |
||
147 | const struct brw_device_info *devinfo = p->devinfo; |
||
148 | |||
149 | if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && |
||
150 | dest.file != BRW_MESSAGE_REGISTER_FILE) |
||
151 | assert(dest.nr < 128); |
||
152 | |||
153 | gen7_convert_mrf_to_grf(p, &dest); |
||
154 | |||
155 | brw_inst_set_dst_reg_file(devinfo, inst, dest.file); |
||
156 | brw_inst_set_dst_reg_type(devinfo, inst, |
||
157 | brw_reg_type_to_hw_type(devinfo, dest.type, |
||
158 | dest.file)); |
||
159 | brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode); |
||
160 | |||
161 | if (dest.address_mode == BRW_ADDRESS_DIRECT) { |
||
162 | brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr); |
||
163 | |||
164 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
165 | brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr); |
||
166 | if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
||
167 | dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
||
168 | brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); |
||
169 | } else { |
||
170 | brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16); |
||
171 | brw_inst_set_da16_writemask(devinfo, inst, dest.dw1.bits.writemask); |
||
172 | if (dest.file == BRW_GENERAL_REGISTER_FILE || |
||
173 | dest.file == BRW_MESSAGE_REGISTER_FILE) { |
||
174 | assert(dest.dw1.bits.writemask != 0); |
||
175 | } |
||
176 | /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1: |
||
177 | * Although Dst.HorzStride is a don't care for Align16, HW needs |
||
178 | * this to be programmed as "01". |
||
179 | */ |
||
180 | brw_inst_set_dst_hstride(devinfo, inst, 1); |
||
181 | } |
||
182 | } else { |
||
183 | brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr); |
||
184 | |||
185 | /* These are different sizes in align1 vs align16: |
||
186 | */ |
||
187 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
188 | brw_inst_set_dst_ia1_addr_imm(devinfo, inst, |
||
189 | dest.dw1.bits.indirect_offset); |
||
190 | if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
||
191 | dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
||
192 | brw_inst_set_dst_hstride(devinfo, inst, dest.hstride); |
||
193 | } else { |
||
194 | brw_inst_set_dst_ia16_addr_imm(devinfo, inst, |
||
195 | dest.dw1.bits.indirect_offset); |
||
196 | /* even ignored in da16, still need to set as '01' */ |
||
197 | brw_inst_set_dst_hstride(devinfo, inst, 1); |
||
198 | } |
||
199 | } |
||
200 | |||
201 | /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8) |
||
202 | * or 16 (SIMD16), as that's normally correct. However, when dealing with |
||
203 | * small registers, we automatically reduce it to match the register size. |
||
204 | */ |
||
205 | if (dest.width < BRW_EXECUTE_8) |
||
206 | brw_inst_set_exec_size(devinfo, inst, dest.width); |
||
207 | } |
||
208 | |||
209 | extern int reg_type_size[]; |
||
210 | |||
211 | static void |
||
212 | validate_reg(const struct brw_device_info *devinfo, |
||
213 | brw_inst *inst, struct brw_reg reg) |
||
214 | { |
||
215 | const int hstride_for_reg[] = {0, 1, 2, 4}; |
||
216 | const int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32}; |
||
217 | const int width_for_reg[] = {1, 2, 4, 8, 16}; |
||
218 | const int execsize_for_reg[] = {1, 2, 4, 8, 16, 32}; |
||
219 | int width, hstride, vstride, execsize; |
||
220 | |||
221 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
222 | /* 3.3.6: Region Parameters. Restriction: Immediate vectors |
||
223 | * mean the destination has to be 128-bit aligned and the |
||
224 | * destination horiz stride has to be a word. |
||
225 | */ |
||
226 | if (reg.type == BRW_REGISTER_TYPE_V) { |
||
227 | assert(hstride_for_reg[brw_inst_dst_hstride(devinfo, inst)] * |
||
228 | reg_type_size[brw_inst_dst_reg_type(devinfo, inst)] == 2); |
||
229 | } |
||
230 | |||
231 | return; |
||
232 | } |
||
233 | |||
234 | if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
235 | reg.file == BRW_ARF_NULL) |
||
236 | return; |
||
237 | |||
238 | assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg)); |
||
239 | hstride = hstride_for_reg[reg.hstride]; |
||
240 | |||
241 | if (reg.vstride == 0xf) { |
||
242 | vstride = -1; |
||
243 | } else { |
||
244 | assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg)); |
||
245 | vstride = vstride_for_reg[reg.vstride]; |
||
246 | } |
||
247 | |||
248 | assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg)); |
||
249 | width = width_for_reg[reg.width]; |
||
250 | |||
251 | assert(brw_inst_exec_size(devinfo, inst) >= 0 && |
||
252 | brw_inst_exec_size(devinfo, inst) < ARRAY_SIZE(execsize_for_reg)); |
||
253 | execsize = execsize_for_reg[brw_inst_exec_size(devinfo, inst)]; |
||
254 | |||
255 | /* Restrictions from 3.3.10: Register Region Restrictions. */ |
||
256 | /* 3. */ |
||
257 | assert(execsize >= width); |
||
258 | |||
259 | /* 4. */ |
||
260 | if (execsize == width && hstride != 0) { |
||
261 | assert(vstride == -1 || vstride == width * hstride); |
||
262 | } |
||
263 | |||
264 | /* 5. */ |
||
265 | if (execsize == width && hstride == 0) { |
||
266 | /* no restriction on vstride. */ |
||
267 | } |
||
268 | |||
269 | /* 6. */ |
||
270 | if (width == 1) { |
||
271 | assert(hstride == 0); |
||
272 | } |
||
273 | |||
274 | /* 7. */ |
||
275 | if (execsize == 1 && width == 1) { |
||
276 | assert(hstride == 0); |
||
277 | assert(vstride == 0); |
||
278 | } |
||
279 | |||
280 | /* 8. */ |
||
281 | if (vstride == 0 && hstride == 0) { |
||
282 | assert(width == 1); |
||
283 | } |
||
284 | |||
285 | /* 10. Check destination issues. */ |
||
286 | } |
||
287 | |||
288 | static bool |
||
289 | is_compactable_immediate(unsigned imm) |
||
290 | { |
||
291 | /* We get the low 12 bits as-is. */ |
||
292 | imm &= ~0xfff; |
||
293 | |||
294 | /* We get one bit replicated through the top 20 bits. */ |
||
295 | return imm == 0 || imm == 0xfffff000; |
||
296 | } |
||
297 | |||
298 | void |
||
299 | brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) |
||
300 | { |
||
301 | const struct brw_device_info *devinfo = p->devinfo; |
||
302 | |||
303 | if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) |
||
304 | assert(reg.nr < 128); |
||
305 | |||
306 | gen7_convert_mrf_to_grf(p, ®); |
||
307 | |||
308 | if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND || |
||
309 | brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) { |
||
310 | /* Any source modifiers or regions will be ignored, since this just |
||
311 | * identifies the MRF/GRF to start reading the message contents from. |
||
312 | * Check for some likely failures. |
||
313 | */ |
||
314 | assert(!reg.negate); |
||
315 | assert(!reg.abs); |
||
316 | assert(reg.address_mode == BRW_ADDRESS_DIRECT); |
||
317 | } |
||
318 | |||
319 | validate_reg(devinfo, inst, reg); |
||
320 | |||
321 | brw_inst_set_src0_reg_file(devinfo, inst, reg.file); |
||
322 | brw_inst_set_src0_reg_type(devinfo, inst, |
||
323 | brw_reg_type_to_hw_type(devinfo, reg.type, reg.file)); |
||
324 | brw_inst_set_src0_abs(devinfo, inst, reg.abs); |
||
325 | brw_inst_set_src0_negate(devinfo, inst, reg.negate); |
||
326 | brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode); |
||
327 | |||
328 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
329 | brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud); |
||
330 | |||
331 | /* The Bspec's section titled "Non-present Operands" claims that if src0 |
||
332 | * is an immediate that src1's type must be the same as that of src0. |
||
333 | * |
||
334 | * The SNB+ DataTypeIndex instruction compaction tables contain mappings |
||
335 | * that do not follow this rule. E.g., from the IVB/HSW table: |
||
336 | * |
||
337 | * DataTypeIndex 18-Bit Mapping Mapped Meaning |
||
338 | * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir | |
||
339 | * |
||
340 | * And from the SNB table: |
||
341 | * |
||
342 | * DataTypeIndex 18-Bit Mapping Mapped Meaning |
||
343 | * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir | |
||
344 | * |
||
345 | * Neither of these cause warnings from the simulator when used, |
||
346 | * compacted or otherwise. In fact, all compaction mappings that have an |
||
347 | * immediate in src0 use a:ud for src1. |
||
348 | * |
||
349 | * The GM45 instruction compaction tables do not contain mapped meanings |
||
350 | * so it's not clear whether it has the restriction. We'll assume it was |
||
351 | * lifted on SNB. (FINISHME: decode the GM45 tables and check.) |
||
352 | */ |
||
353 | brw_inst_set_src1_reg_file(devinfo, inst, BRW_ARCHITECTURE_REGISTER_FILE); |
||
354 | if (devinfo->gen < 6) { |
||
355 | brw_inst_set_src1_reg_type(devinfo, inst, |
||
356 | brw_inst_src0_reg_type(devinfo, inst)); |
||
357 | } else { |
||
358 | brw_inst_set_src1_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD); |
||
359 | } |
||
360 | |||
361 | /* Compacted instructions only have 12-bits (plus 1 for the other 20) |
||
362 | * for immediate values. Presumably the hardware engineers realized |
||
363 | * that the only useful floating-point value that could be represented |
||
364 | * in this format is 0.0, which can also be represented as a VF-typed |
||
365 | * immediate, so they gave us the previously mentioned mapping on IVB+. |
||
366 | * |
||
367 | * Strangely, we do have a mapping for imm:f in src1, so we don't need |
||
368 | * to do this there. |
||
369 | * |
||
370 | * If we see a 0.0:F, change the type to VF so that it can be compacted. |
||
371 | */ |
||
372 | if (brw_inst_imm_ud(devinfo, inst) == 0x0 && |
||
373 | brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_F) { |
||
374 | brw_inst_set_src0_reg_type(devinfo, inst, BRW_HW_REG_IMM_TYPE_VF); |
||
375 | } |
||
376 | |||
377 | /* There are no mappings for dst:d | i:d, so if the immediate is suitable |
||
378 | * set the types to :UD so the instruction can be compacted. |
||
379 | */ |
||
380 | if (is_compactable_immediate(brw_inst_imm_ud(devinfo, inst)) && |
||
381 | brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE && |
||
382 | brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_D && |
||
383 | brw_inst_dst_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_D) { |
||
384 | brw_inst_set_src0_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD); |
||
385 | brw_inst_set_dst_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD); |
||
386 | } |
||
387 | } else { |
||
388 | if (reg.address_mode == BRW_ADDRESS_DIRECT) { |
||
389 | brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr); |
||
390 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
391 | brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr); |
||
392 | } else { |
||
393 | brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16); |
||
394 | } |
||
395 | } else { |
||
396 | brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr); |
||
397 | |||
398 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
399 | brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset); |
||
400 | } else { |
||
401 | brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset); |
||
402 | } |
||
403 | } |
||
404 | |||
405 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
406 | if (reg.width == BRW_WIDTH_1 && |
||
407 | brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { |
||
408 | brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); |
||
409 | brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1); |
||
410 | brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); |
||
411 | } else { |
||
412 | brw_inst_set_src0_hstride(devinfo, inst, reg.hstride); |
||
413 | brw_inst_set_src0_width(devinfo, inst, reg.width); |
||
414 | brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); |
||
415 | } |
||
416 | } else { |
||
417 | brw_inst_set_src0_da16_swiz_x(devinfo, inst, |
||
418 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X)); |
||
419 | brw_inst_set_src0_da16_swiz_y(devinfo, inst, |
||
420 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y)); |
||
421 | brw_inst_set_src0_da16_swiz_z(devinfo, inst, |
||
422 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z)); |
||
423 | brw_inst_set_src0_da16_swiz_w(devinfo, inst, |
||
424 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W)); |
||
425 | |||
426 | /* This is an oddity of the fact we're using the same |
||
427 | * descriptions for registers in align_16 as align_1: |
||
428 | */ |
||
429 | if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
||
430 | brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); |
||
431 | else |
||
432 | brw_inst_set_src0_vstride(devinfo, inst, reg.vstride); |
||
433 | } |
||
434 | } |
||
435 | } |
||
436 | |||
437 | |||
438 | void |
||
439 | brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg) |
||
440 | { |
||
441 | const struct brw_device_info *devinfo = p->devinfo; |
||
442 | |||
443 | if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE) |
||
444 | assert(reg.nr < 128); |
||
445 | |||
446 | gen7_convert_mrf_to_grf(p, ®); |
||
447 | assert(reg.file != BRW_MESSAGE_REGISTER_FILE); |
||
448 | |||
449 | validate_reg(devinfo, inst, reg); |
||
450 | |||
451 | brw_inst_set_src1_reg_file(devinfo, inst, reg.file); |
||
452 | brw_inst_set_src1_reg_type(devinfo, inst, |
||
453 | brw_reg_type_to_hw_type(devinfo, reg.type, reg.file)); |
||
454 | brw_inst_set_src1_abs(devinfo, inst, reg.abs); |
||
455 | brw_inst_set_src1_negate(devinfo, inst, reg.negate); |
||
456 | |||
457 | /* Only src1 can be immediate in two-argument instructions. |
||
458 | */ |
||
459 | assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE); |
||
460 | |||
461 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
462 | brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud); |
||
463 | } else { |
||
464 | /* This is a hardware restriction, which may or may not be lifted |
||
465 | * in the future: |
||
466 | */ |
||
467 | assert (reg.address_mode == BRW_ADDRESS_DIRECT); |
||
468 | /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ |
||
469 | |||
470 | brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr); |
||
471 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
472 | brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr); |
||
473 | } else { |
||
474 | brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16); |
||
475 | } |
||
476 | |||
477 | if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { |
||
478 | if (reg.width == BRW_WIDTH_1 && |
||
479 | brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) { |
||
480 | brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0); |
||
481 | brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1); |
||
482 | brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0); |
||
483 | } else { |
||
484 | brw_inst_set_src1_hstride(devinfo, inst, reg.hstride); |
||
485 | brw_inst_set_src1_width(devinfo, inst, reg.width); |
||
486 | brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); |
||
487 | } |
||
488 | } else { |
||
489 | brw_inst_set_src1_da16_swiz_x(devinfo, inst, |
||
490 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X)); |
||
491 | brw_inst_set_src1_da16_swiz_y(devinfo, inst, |
||
492 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y)); |
||
493 | brw_inst_set_src1_da16_swiz_z(devinfo, inst, |
||
494 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z)); |
||
495 | brw_inst_set_src1_da16_swiz_w(devinfo, inst, |
||
496 | BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W)); |
||
497 | |||
498 | /* This is an oddity of the fact we're using the same |
||
499 | * descriptions for registers in align_16 as align_1: |
||
500 | */ |
||
501 | if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
||
502 | brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4); |
||
503 | else |
||
504 | brw_inst_set_src1_vstride(devinfo, inst, reg.vstride); |
||
505 | } |
||
506 | } |
||
507 | } |
||
508 | |||
509 | /** |
||
510 | * Set the Message Descriptor and Extended Message Descriptor fields |
||
511 | * for SEND messages. |
||
512 | * |
||
513 | * \note This zeroes out the Function Control bits, so it must be called |
||
514 | * \b before filling out any message-specific data. Callers can |
||
515 | * choose not to fill in irrelevant bits; they will be zero. |
||
516 | */ |
||
517 | static void |
||
518 | brw_set_message_descriptor(struct brw_codegen *p, |
||
519 | brw_inst *inst, |
||
520 | enum brw_message_target sfid, |
||
521 | unsigned msg_length, |
||
522 | unsigned response_length, |
||
523 | bool header_present, |
||
524 | bool end_of_thread) |
||
525 | { |
||
526 | const struct brw_device_info *devinfo = p->devinfo; |
||
527 | |||
528 | brw_set_src1(p, inst, brw_imm_d(0)); |
||
529 | |||
530 | /* For indirect sends, `inst` will not be the SEND/SENDC instruction |
||
531 | * itself; instead, it will be a MOV/OR into the address register. |
||
532 | * |
||
533 | * In this case, we avoid setting the extended message descriptor bits, |
||
534 | * since they go on the later SEND/SENDC instead and if set here would |
||
535 | * instead clobber the conditionalmod bits. |
||
536 | */ |
||
537 | unsigned opcode = brw_inst_opcode(devinfo, inst); |
||
538 | if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) { |
||
539 | brw_inst_set_sfid(devinfo, inst, sfid); |
||
540 | } |
||
541 | |||
542 | brw_inst_set_mlen(devinfo, inst, msg_length); |
||
543 | brw_inst_set_rlen(devinfo, inst, response_length); |
||
544 | brw_inst_set_eot(devinfo, inst, end_of_thread); |
||
545 | |||
546 | if (devinfo->gen >= 5) { |
||
547 | brw_inst_set_header_present(devinfo, inst, header_present); |
||
548 | } |
||
549 | } |
||
550 | |||
551 | static void brw_set_math_message( struct brw_codegen *p, |
||
552 | brw_inst *inst, |
||
553 | unsigned function, |
||
554 | unsigned integer_type, |
||
555 | bool low_precision, |
||
556 | unsigned dataType ) |
||
557 | { |
||
558 | const struct brw_device_info *devinfo = p->devinfo; |
||
559 | unsigned msg_length; |
||
560 | unsigned response_length; |
||
561 | |||
562 | /* Infer message length from the function */ |
||
563 | switch (function) { |
||
564 | case BRW_MATH_FUNCTION_POW: |
||
565 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: |
||
566 | case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: |
||
567 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
||
568 | msg_length = 2; |
||
569 | break; |
||
570 | default: |
||
571 | msg_length = 1; |
||
572 | break; |
||
573 | } |
||
574 | |||
575 | /* Infer response length from the function */ |
||
576 | switch (function) { |
||
577 | case BRW_MATH_FUNCTION_SINCOS: |
||
578 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
||
579 | response_length = 2; |
||
580 | break; |
||
581 | default: |
||
582 | response_length = 1; |
||
583 | break; |
||
584 | } |
||
585 | |||
586 | |||
587 | brw_set_message_descriptor(p, inst, BRW_SFID_MATH, |
||
588 | msg_length, response_length, false, false); |
||
589 | brw_inst_set_math_msg_function(devinfo, inst, function); |
||
590 | brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type); |
||
591 | brw_inst_set_math_msg_precision(devinfo, inst, low_precision); |
||
592 | brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst)); |
||
593 | brw_inst_set_math_msg_data_type(devinfo, inst, dataType); |
||
594 | brw_inst_set_saturate(devinfo, inst, 0); |
||
595 | } |
||
596 | |||
597 | |||
598 | static void brw_set_ff_sync_message(struct brw_codegen *p, |
||
599 | brw_inst *insn, |
||
600 | bool allocate, |
||
601 | unsigned response_length, |
||
602 | bool end_of_thread) |
||
603 | { |
||
604 | const struct brw_device_info *devinfo = p->devinfo; |
||
605 | |||
606 | brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
||
607 | 1, response_length, true, end_of_thread); |
||
608 | brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */ |
||
609 | brw_inst_set_urb_allocate(devinfo, insn, allocate); |
||
610 | /* The following fields are not used by FF_SYNC: */ |
||
611 | brw_inst_set_urb_global_offset(devinfo, insn, 0); |
||
612 | brw_inst_set_urb_swizzle_control(devinfo, insn, 0); |
||
613 | brw_inst_set_urb_used(devinfo, insn, 0); |
||
614 | brw_inst_set_urb_complete(devinfo, insn, 0); |
||
615 | } |
||
616 | |||
617 | static void brw_set_urb_message( struct brw_codegen *p, |
||
618 | brw_inst *insn, |
||
619 | enum brw_urb_write_flags flags, |
||
620 | unsigned msg_length, |
||
621 | unsigned response_length, |
||
622 | unsigned offset, |
||
623 | unsigned swizzle_control ) |
||
624 | { |
||
625 | const struct brw_device_info *devinfo = p->devinfo; |
||
626 | |||
627 | assert(devinfo->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); |
||
628 | assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE)); |
||
629 | assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); |
||
630 | |||
631 | brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
||
632 | msg_length, response_length, true, |
||
633 | flags & BRW_URB_WRITE_EOT); |
||
634 | |||
635 | if (flags & BRW_URB_WRITE_OWORD) { |
||
636 | assert(msg_length == 2); /* header + one OWORD of data */ |
||
637 | brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD); |
||
638 | } else { |
||
639 | brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD); |
||
640 | } |
||
641 | |||
642 | brw_inst_set_urb_global_offset(devinfo, insn, offset); |
||
643 | brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control); |
||
644 | |||
645 | if (devinfo->gen < 8) { |
||
646 | brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE)); |
||
647 | } |
||
648 | |||
649 | if (devinfo->gen < 7) { |
||
650 | brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE)); |
||
651 | brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED)); |
||
652 | } else { |
||
653 | brw_inst_set_urb_per_slot_offset(devinfo, insn, |
||
654 | !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET)); |
||
655 | } |
||
656 | } |
||
657 | |||
658 | void |
||
659 | brw_set_dp_write_message(struct brw_codegen *p, |
||
660 | brw_inst *insn, |
||
661 | unsigned binding_table_index, |
||
662 | unsigned msg_control, |
||
663 | unsigned msg_type, |
||
664 | unsigned msg_length, |
||
665 | bool header_present, |
||
666 | unsigned last_render_target, |
||
667 | unsigned response_length, |
||
668 | unsigned end_of_thread, |
||
669 | unsigned send_commit_msg) |
||
670 | { |
||
671 | const struct brw_device_info *devinfo = p->devinfo; |
||
672 | unsigned sfid; |
||
673 | |||
674 | if (devinfo->gen >= 7) { |
||
675 | /* Use the Render Cache for RT writes; otherwise use the Data Cache */ |
||
676 | if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) |
||
677 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
678 | else |
||
679 | sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
||
680 | } else if (devinfo->gen == 6) { |
||
681 | /* Use the render cache for all write messages. */ |
||
682 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
683 | } else { |
||
684 | sfid = BRW_SFID_DATAPORT_WRITE; |
||
685 | } |
||
686 | |||
687 | brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, |
||
688 | header_present, end_of_thread); |
||
689 | |||
690 | brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); |
||
691 | brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type); |
||
692 | brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control); |
||
693 | brw_inst_set_rt_last(devinfo, insn, last_render_target); |
||
694 | if (devinfo->gen < 7) { |
||
695 | brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg); |
||
696 | } |
||
697 | } |
||
698 | |||
699 | void |
||
700 | brw_set_dp_read_message(struct brw_codegen *p, |
||
701 | brw_inst *insn, |
||
702 | unsigned binding_table_index, |
||
703 | unsigned msg_control, |
||
704 | unsigned msg_type, |
||
705 | unsigned target_cache, |
||
706 | unsigned msg_length, |
||
707 | bool header_present, |
||
708 | unsigned response_length) |
||
709 | { |
||
710 | const struct brw_device_info *devinfo = p->devinfo; |
||
711 | unsigned sfid; |
||
712 | |||
713 | if (devinfo->gen >= 7) { |
||
714 | sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
||
715 | } else if (devinfo->gen == 6) { |
||
716 | if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) |
||
717 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
718 | else |
||
719 | sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; |
||
720 | } else { |
||
721 | sfid = BRW_SFID_DATAPORT_READ; |
||
722 | } |
||
723 | |||
724 | brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, |
||
725 | header_present, false); |
||
726 | |||
727 | brw_inst_set_binding_table_index(devinfo, insn, binding_table_index); |
||
728 | brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type); |
||
729 | brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control); |
||
730 | if (devinfo->gen < 6) |
||
731 | brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache); |
||
732 | } |
||
733 | |||
734 | void |
||
735 | brw_set_sampler_message(struct brw_codegen *p, |
||
736 | brw_inst *inst, |
||
737 | unsigned binding_table_index, |
||
738 | unsigned sampler, |
||
739 | unsigned msg_type, |
||
740 | unsigned response_length, |
||
741 | unsigned msg_length, |
||
742 | unsigned header_present, |
||
743 | unsigned simd_mode, |
||
744 | unsigned return_format) |
||
745 | { |
||
746 | const struct brw_device_info *devinfo = p->devinfo; |
||
747 | |||
748 | brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length, |
||
749 | response_length, header_present, false); |
||
750 | |||
751 | brw_inst_set_binding_table_index(devinfo, inst, binding_table_index); |
||
752 | brw_inst_set_sampler(devinfo, inst, sampler); |
||
753 | brw_inst_set_sampler_msg_type(devinfo, inst, msg_type); |
||
754 | if (devinfo->gen >= 5) { |
||
755 | brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode); |
||
756 | } else if (devinfo->gen == 4 && !devinfo->is_g4x) { |
||
757 | brw_inst_set_sampler_return_format(devinfo, inst, return_format); |
||
758 | } |
||
759 | } |
||
760 | |||
761 | static void |
||
762 | gen7_set_dp_scratch_message(struct brw_codegen *p, |
||
763 | brw_inst *inst, |
||
764 | bool write, |
||
765 | bool dword, |
||
766 | bool invalidate_after_read, |
||
767 | unsigned num_regs, |
||
768 | unsigned addr_offset, |
||
769 | unsigned mlen, |
||
770 | unsigned rlen, |
||
771 | bool header_present) |
||
772 | { |
||
773 | const struct brw_device_info *devinfo = p->devinfo; |
||
774 | assert(num_regs == 1 || num_regs == 2 || num_regs == 4 || |
||
775 | (devinfo->gen >= 8 && num_regs == 8)); |
||
776 | brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE, |
||
777 | mlen, rlen, header_present, false); |
||
778 | brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */ |
||
779 | brw_inst_set_scratch_read_write(devinfo, inst, write); |
||
780 | brw_inst_set_scratch_type(devinfo, inst, dword); |
||
781 | brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read); |
||
782 | brw_inst_set_scratch_block_size(devinfo, inst, ffs(num_regs) - 1); |
||
783 | brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset); |
||
784 | } |
||
785 | |||
786 | #define next_insn brw_next_insn |
||
787 | brw_inst * |
||
788 | brw_next_insn(struct brw_codegen *p, unsigned opcode) |
||
789 | { |
||
790 | const struct brw_device_info *devinfo = p->devinfo; |
||
791 | brw_inst *insn; |
||
792 | |||
793 | if (p->nr_insn + 1 > p->store_size) { |
||
794 | p->store_size <<= 1; |
||
795 | p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size); |
||
796 | } |
||
797 | |||
798 | p->next_insn_offset += 16; |
||
799 | insn = &p->store[p->nr_insn++]; |
||
800 | memcpy(insn, p->current, sizeof(*insn)); |
||
801 | |||
802 | brw_inst_set_opcode(devinfo, insn, opcode); |
||
803 | return insn; |
||
804 | } |
||
805 | |||
806 | static brw_inst * |
||
807 | brw_alu1(struct brw_codegen *p, unsigned opcode, |
||
808 | struct brw_reg dest, struct brw_reg src) |
||
809 | { |
||
810 | brw_inst *insn = next_insn(p, opcode); |
||
811 | brw_set_dest(p, insn, dest); |
||
812 | brw_set_src0(p, insn, src); |
||
813 | return insn; |
||
814 | } |
||
815 | |||
816 | static brw_inst * |
||
817 | brw_alu2(struct brw_codegen *p, unsigned opcode, |
||
818 | struct brw_reg dest, struct brw_reg src0, struct brw_reg src1) |
||
819 | { |
||
820 | brw_inst *insn = next_insn(p, opcode); |
||
821 | brw_set_dest(p, insn, dest); |
||
822 | brw_set_src0(p, insn, src0); |
||
823 | brw_set_src1(p, insn, src1); |
||
824 | return insn; |
||
825 | } |
||
826 | |||
827 | static int |
||
828 | get_3src_subreg_nr(struct brw_reg reg) |
||
829 | { |
||
830 | if (reg.vstride == BRW_VERTICAL_STRIDE_0) { |
||
831 | assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle)); |
||
832 | return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0); |
||
833 | } else { |
||
834 | return reg.subnr / 4; |
||
835 | } |
||
836 | } |
||
837 | |||
838 | static brw_inst * |
||
839 | brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest, |
||
840 | struct brw_reg src0, struct brw_reg src1, struct brw_reg src2) |
||
841 | { |
||
842 | const struct brw_device_info *devinfo = p->devinfo; |
||
843 | brw_inst *inst = next_insn(p, opcode); |
||
844 | |||
845 | gen7_convert_mrf_to_grf(p, &dest); |
||
846 | |||
847 | assert(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16); |
||
848 | |||
849 | assert(dest.file == BRW_GENERAL_REGISTER_FILE || |
||
850 | dest.file == BRW_MESSAGE_REGISTER_FILE); |
||
851 | assert(dest.nr < 128); |
||
852 | assert(dest.address_mode == BRW_ADDRESS_DIRECT); |
||
853 | assert(dest.type == BRW_REGISTER_TYPE_F || |
||
854 | dest.type == BRW_REGISTER_TYPE_D || |
||
855 | dest.type == BRW_REGISTER_TYPE_UD); |
||
856 | if (devinfo->gen == 6) { |
||
857 | brw_inst_set_3src_dst_reg_file(devinfo, inst, |
||
858 | dest.file == BRW_MESSAGE_REGISTER_FILE); |
||
859 | } |
||
860 | brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr); |
||
861 | brw_inst_set_3src_dst_subreg_nr(devinfo, inst, dest.subnr / 16); |
||
862 | brw_inst_set_3src_dst_writemask(devinfo, inst, dest.dw1.bits.writemask); |
||
863 | |||
864 | assert(src0.file == BRW_GENERAL_REGISTER_FILE); |
||
865 | assert(src0.address_mode == BRW_ADDRESS_DIRECT); |
||
866 | assert(src0.nr < 128); |
||
867 | brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.dw1.bits.swizzle); |
||
868 | brw_inst_set_3src_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0)); |
||
869 | brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr); |
||
870 | brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs); |
||
871 | brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate); |
||
872 | brw_inst_set_3src_src0_rep_ctrl(devinfo, inst, |
||
873 | src0.vstride == BRW_VERTICAL_STRIDE_0); |
||
874 | |||
875 | assert(src1.file == BRW_GENERAL_REGISTER_FILE); |
||
876 | assert(src1.address_mode == BRW_ADDRESS_DIRECT); |
||
877 | assert(src1.nr < 128); |
||
878 | brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.dw1.bits.swizzle); |
||
879 | brw_inst_set_3src_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1)); |
||
880 | brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr); |
||
881 | brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs); |
||
882 | brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate); |
||
883 | brw_inst_set_3src_src1_rep_ctrl(devinfo, inst, |
||
884 | src1.vstride == BRW_VERTICAL_STRIDE_0); |
||
885 | |||
886 | assert(src2.file == BRW_GENERAL_REGISTER_FILE); |
||
887 | assert(src2.address_mode == BRW_ADDRESS_DIRECT); |
||
888 | assert(src2.nr < 128); |
||
889 | brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.dw1.bits.swizzle); |
||
890 | brw_inst_set_3src_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2)); |
||
891 | brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr); |
||
892 | brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs); |
||
893 | brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate); |
||
894 | brw_inst_set_3src_src2_rep_ctrl(devinfo, inst, |
||
895 | src2.vstride == BRW_VERTICAL_STRIDE_0); |
||
896 | |||
897 | if (devinfo->gen >= 7) { |
||
898 | /* Set both the source and destination types based on dest.type, |
||
899 | * ignoring the source register types. The MAD and LRP emitters ensure |
||
900 | * that all four types are float. The BFE and BFI2 emitters, however, |
||
901 | * may send us mixed D and UD types and want us to ignore that and use |
||
902 | * the destination type. |
||
903 | */ |
||
904 | switch (dest.type) { |
||
905 | case BRW_REGISTER_TYPE_F: |
||
906 | brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_F); |
||
907 | brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_F); |
||
908 | break; |
||
909 | case BRW_REGISTER_TYPE_D: |
||
910 | brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_D); |
||
911 | brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_D); |
||
912 | break; |
||
913 | case BRW_REGISTER_TYPE_UD: |
||
914 | brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD); |
||
915 | brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD); |
||
916 | break; |
||
917 | } |
||
918 | } |
||
919 | |||
920 | return inst; |
||
921 | } |
||
922 | |||
923 | |||
924 | /*********************************************************************** |
||
925 | * Convenience routines. |
||
926 | */ |
||
927 | #define ALU1(OP) \ |
||
928 | brw_inst *brw_##OP(struct brw_codegen *p, \ |
||
929 | struct brw_reg dest, \ |
||
930 | struct brw_reg src0) \ |
||
931 | { \ |
||
932 | return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ |
||
933 | } |
||
934 | |||
935 | #define ALU2(OP) \ |
||
936 | brw_inst *brw_##OP(struct brw_codegen *p, \ |
||
937 | struct brw_reg dest, \ |
||
938 | struct brw_reg src0, \ |
||
939 | struct brw_reg src1) \ |
||
940 | { \ |
||
941 | return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ |
||
942 | } |
||
943 | |||
944 | #define ALU3(OP) \ |
||
945 | brw_inst *brw_##OP(struct brw_codegen *p, \ |
||
946 | struct brw_reg dest, \ |
||
947 | struct brw_reg src0, \ |
||
948 | struct brw_reg src1, \ |
||
949 | struct brw_reg src2) \ |
||
950 | { \ |
||
951 | return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ |
||
952 | } |
||
953 | |||
954 | #define ALU3F(OP) \ |
||
955 | brw_inst *brw_##OP(struct brw_codegen *p, \ |
||
956 | struct brw_reg dest, \ |
||
957 | struct brw_reg src0, \ |
||
958 | struct brw_reg src1, \ |
||
959 | struct brw_reg src2) \ |
||
960 | { \ |
||
961 | assert(dest.type == BRW_REGISTER_TYPE_F); \ |
||
962 | assert(src0.type == BRW_REGISTER_TYPE_F); \ |
||
963 | assert(src1.type == BRW_REGISTER_TYPE_F); \ |
||
964 | assert(src2.type == BRW_REGISTER_TYPE_F); \ |
||
965 | return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \ |
||
966 | } |
||
967 | |||
968 | /* Rounding operations (other than RNDD) require two instructions - the first |
||
969 | * stores a rounded value (possibly the wrong way) in the dest register, but |
||
970 | * also sets a per-channel "increment bit" in the flag register. A predicated |
||
971 | * add of 1.0 fixes dest to contain the desired result. |
||
972 | * |
||
973 | * Sandybridge and later appear to round correctly without an ADD. |
||
974 | */ |
||
975 | #define ROUND(OP) \ |
||
976 | void brw_##OP(struct brw_codegen *p, \ |
||
977 | struct brw_reg dest, \ |
||
978 | struct brw_reg src) \ |
||
979 | { \ |
||
980 | const struct brw_device_info *devinfo = p->devinfo; \ |
||
981 | brw_inst *rnd, *add; \ |
||
982 | rnd = next_insn(p, BRW_OPCODE_##OP); \ |
||
983 | brw_set_dest(p, rnd, dest); \ |
||
984 | brw_set_src0(p, rnd, src); \ |
||
985 | \ |
||
986 | if (devinfo->gen < 6) { \ |
||
987 | /* turn on round-increments */ \ |
||
988 | brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R); \ |
||
989 | add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ |
||
990 | brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL); \ |
||
991 | } \ |
||
992 | } |
||
993 | |||
994 | |||
995 | ALU1(MOV) |
||
996 | ALU2(SEL) |
||
997 | ALU1(NOT) |
||
998 | ALU2(AND) |
||
999 | ALU2(OR) |
||
1000 | ALU2(XOR) |
||
1001 | ALU2(SHR) |
||
1002 | ALU2(SHL) |
||
1003 | ALU2(ASR) |
||
1004 | ALU1(FRC) |
||
1005 | ALU1(RNDD) |
||
1006 | ALU2(MAC) |
||
1007 | ALU2(MACH) |
||
1008 | ALU1(LZD) |
||
1009 | ALU2(DP4) |
||
1010 | ALU2(DPH) |
||
1011 | ALU2(DP3) |
||
1012 | ALU2(DP2) |
||
1013 | ALU3F(MAD) |
||
1014 | ALU3F(LRP) |
||
1015 | ALU1(BFREV) |
||
1016 | ALU3(BFE) |
||
1017 | ALU2(BFI1) |
||
1018 | ALU3(BFI2) |
||
1019 | ALU1(FBH) |
||
1020 | ALU1(FBL) |
||
1021 | ALU1(CBIT) |
||
1022 | ALU2(ADDC) |
||
1023 | ALU2(SUBB) |
||
1024 | |||
1025 | ROUND(RNDZ) |
||
1026 | ROUND(RNDE) |
||
1027 | |||
1028 | |||
1029 | brw_inst * |
||
1030 | brw_ADD(struct brw_codegen *p, struct brw_reg dest, |
||
1031 | struct brw_reg src0, struct brw_reg src1) |
||
1032 | { |
||
1033 | /* 6.2.2: add */ |
||
1034 | if (src0.type == BRW_REGISTER_TYPE_F || |
||
1035 | (src0.file == BRW_IMMEDIATE_VALUE && |
||
1036 | src0.type == BRW_REGISTER_TYPE_VF)) { |
||
1037 | assert(src1.type != BRW_REGISTER_TYPE_UD); |
||
1038 | assert(src1.type != BRW_REGISTER_TYPE_D); |
||
1039 | } |
||
1040 | |||
1041 | if (src1.type == BRW_REGISTER_TYPE_F || |
||
1042 | (src1.file == BRW_IMMEDIATE_VALUE && |
||
1043 | src1.type == BRW_REGISTER_TYPE_VF)) { |
||
1044 | assert(src0.type != BRW_REGISTER_TYPE_UD); |
||
1045 | assert(src0.type != BRW_REGISTER_TYPE_D); |
||
1046 | } |
||
1047 | |||
1048 | return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1); |
||
1049 | } |
||
1050 | |||
1051 | brw_inst * |
||
1052 | brw_AVG(struct brw_codegen *p, struct brw_reg dest, |
||
1053 | struct brw_reg src0, struct brw_reg src1) |
||
1054 | { |
||
1055 | assert(dest.type == src0.type); |
||
1056 | assert(src0.type == src1.type); |
||
1057 | switch (src0.type) { |
||
1058 | case BRW_REGISTER_TYPE_B: |
||
1059 | case BRW_REGISTER_TYPE_UB: |
||
1060 | case BRW_REGISTER_TYPE_W: |
||
1061 | case BRW_REGISTER_TYPE_UW: |
||
1062 | case BRW_REGISTER_TYPE_D: |
||
1063 | case BRW_REGISTER_TYPE_UD: |
||
1064 | break; |
||
1065 | default: |
||
1066 | unreachable("Bad type for brw_AVG"); |
||
1067 | } |
||
1068 | |||
1069 | return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1); |
||
1070 | } |
||
1071 | |||
1072 | brw_inst * |
||
1073 | brw_MUL(struct brw_codegen *p, struct brw_reg dest, |
||
1074 | struct brw_reg src0, struct brw_reg src1) |
||
1075 | { |
||
1076 | /* 6.32.38: mul */ |
||
1077 | if (src0.type == BRW_REGISTER_TYPE_D || |
||
1078 | src0.type == BRW_REGISTER_TYPE_UD || |
||
1079 | src1.type == BRW_REGISTER_TYPE_D || |
||
1080 | src1.type == BRW_REGISTER_TYPE_UD) { |
||
1081 | assert(dest.type != BRW_REGISTER_TYPE_F); |
||
1082 | } |
||
1083 | |||
1084 | if (src0.type == BRW_REGISTER_TYPE_F || |
||
1085 | (src0.file == BRW_IMMEDIATE_VALUE && |
||
1086 | src0.type == BRW_REGISTER_TYPE_VF)) { |
||
1087 | assert(src1.type != BRW_REGISTER_TYPE_UD); |
||
1088 | assert(src1.type != BRW_REGISTER_TYPE_D); |
||
1089 | } |
||
1090 | |||
1091 | if (src1.type == BRW_REGISTER_TYPE_F || |
||
1092 | (src1.file == BRW_IMMEDIATE_VALUE && |
||
1093 | src1.type == BRW_REGISTER_TYPE_VF)) { |
||
1094 | assert(src0.type != BRW_REGISTER_TYPE_UD); |
||
1095 | assert(src0.type != BRW_REGISTER_TYPE_D); |
||
1096 | } |
||
1097 | |||
1098 | assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE || |
||
1099 | src0.nr != BRW_ARF_ACCUMULATOR); |
||
1100 | assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE || |
||
1101 | src1.nr != BRW_ARF_ACCUMULATOR); |
||
1102 | |||
1103 | return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); |
||
1104 | } |
||
1105 | |||
1106 | brw_inst * |
||
1107 | brw_LINE(struct brw_codegen *p, struct brw_reg dest, |
||
1108 | struct brw_reg src0, struct brw_reg src1) |
||
1109 | { |
||
1110 | src0.vstride = BRW_VERTICAL_STRIDE_0; |
||
1111 | src0.width = BRW_WIDTH_1; |
||
1112 | src0.hstride = BRW_HORIZONTAL_STRIDE_0; |
||
1113 | return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1); |
||
1114 | } |
||
1115 | |||
1116 | brw_inst * |
||
1117 | brw_PLN(struct brw_codegen *p, struct brw_reg dest, |
||
1118 | struct brw_reg src0, struct brw_reg src1) |
||
1119 | { |
||
1120 | src0.vstride = BRW_VERTICAL_STRIDE_0; |
||
1121 | src0.width = BRW_WIDTH_1; |
||
1122 | src0.hstride = BRW_HORIZONTAL_STRIDE_0; |
||
1123 | src1.vstride = BRW_VERTICAL_STRIDE_8; |
||
1124 | src1.width = BRW_WIDTH_8; |
||
1125 | src1.hstride = BRW_HORIZONTAL_STRIDE_1; |
||
1126 | return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1); |
||
1127 | } |
||
1128 | |||
1129 | brw_inst * |
||
1130 | brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) |
||
1131 | { |
||
1132 | const struct brw_device_info *devinfo = p->devinfo; |
||
1133 | const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; |
||
1134 | /* The F32TO16 instruction doesn't support 32-bit destination types in |
||
1135 | * Align1 mode, and neither does the Gen8 implementation in terms of a |
||
1136 | * converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as |
||
1137 | * an undocumented feature. |
||
1138 | */ |
||
1139 | const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD && |
||
1140 | (!align16 || devinfo->gen >= 8)); |
||
1141 | brw_inst *inst; |
||
1142 | |||
1143 | if (align16) { |
||
1144 | assert(dst.type == BRW_REGISTER_TYPE_UD); |
||
1145 | } else { |
||
1146 | assert(dst.type == BRW_REGISTER_TYPE_UD || |
||
1147 | dst.type == BRW_REGISTER_TYPE_W || |
||
1148 | dst.type == BRW_REGISTER_TYPE_UW || |
||
1149 | dst.type == BRW_REGISTER_TYPE_HF); |
||
1150 | } |
||
1151 | |||
1152 | brw_push_insn_state(p); |
||
1153 | |||
1154 | if (needs_zero_fill) { |
||
1155 | brw_set_default_access_mode(p, BRW_ALIGN_1); |
||
1156 | dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2); |
||
1157 | } |
||
1158 | |||
1159 | if (devinfo->gen >= 8) { |
||
1160 | inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); |
||
1161 | } else { |
||
1162 | assert(devinfo->gen == 7); |
||
1163 | inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); |
||
1164 | } |
||
1165 | |||
1166 | if (needs_zero_fill) { |
||
1167 | brw_inst_set_no_dd_clear(devinfo, inst, true); |
||
1168 | inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u)); |
||
1169 | brw_inst_set_no_dd_check(devinfo, inst, true); |
||
1170 | } |
||
1171 | |||
1172 | brw_pop_insn_state(p); |
||
1173 | return inst; |
||
1174 | } |
||
1175 | |||
1176 | brw_inst * |
||
1177 | brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) |
||
1178 | { |
||
1179 | const struct brw_device_info *devinfo = p->devinfo; |
||
1180 | bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16; |
||
1181 | |||
1182 | if (align16) { |
||
1183 | assert(src.type == BRW_REGISTER_TYPE_UD); |
||
1184 | } else { |
||
1185 | /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: |
||
1186 | * |
||
1187 | * Because this instruction does not have a 16-bit floating-point |
||
1188 | * type, the source data type must be Word (W). The destination type |
||
1189 | * must be F (Float). |
||
1190 | */ |
||
1191 | if (src.type == BRW_REGISTER_TYPE_UD) |
||
1192 | src = spread(retype(src, BRW_REGISTER_TYPE_W), 2); |
||
1193 | |||
1194 | assert(src.type == BRW_REGISTER_TYPE_W || |
||
1195 | src.type == BRW_REGISTER_TYPE_UW || |
||
1196 | src.type == BRW_REGISTER_TYPE_HF); |
||
1197 | } |
||
1198 | |||
1199 | if (devinfo->gen >= 8) { |
||
1200 | return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF)); |
||
1201 | } else { |
||
1202 | assert(devinfo->gen == 7); |
||
1203 | return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src); |
||
1204 | } |
||
1205 | } |
||
1206 | |||
1207 | |||
1208 | void brw_NOP(struct brw_codegen *p) |
||
1209 | { |
||
1210 | brw_inst *insn = next_insn(p, BRW_OPCODE_NOP); |
||
1211 | brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); |
||
1212 | brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); |
||
1213 | brw_set_src1(p, insn, brw_imm_ud(0x0)); |
||
1214 | } |
||
1215 | |||
1216 | |||
1217 | |||
1218 | |||
1219 | |||
1220 | /*********************************************************************** |
||
1221 | * Comparisons, if/else/endif |
||
1222 | */ |
||
1223 | |||
1224 | brw_inst * |
||
1225 | brw_JMPI(struct brw_codegen *p, struct brw_reg index, |
||
1226 | unsigned predicate_control) |
||
1227 | { |
||
1228 | const struct brw_device_info *devinfo = p->devinfo; |
||
1229 | struct brw_reg ip = brw_ip_reg(); |
||
1230 | brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index); |
||
1231 | |||
1232 | brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2); |
||
1233 | brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE); |
||
1234 | brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE); |
||
1235 | brw_inst_set_pred_control(devinfo, inst, predicate_control); |
||
1236 | |||
1237 | return inst; |
||
1238 | } |
||
1239 | |||
1240 | static void |
||
1241 | push_if_stack(struct brw_codegen *p, brw_inst *inst) |
||
1242 | { |
||
1243 | p->if_stack[p->if_stack_depth] = inst - p->store; |
||
1244 | |||
1245 | p->if_stack_depth++; |
||
1246 | if (p->if_stack_array_size <= p->if_stack_depth) { |
||
1247 | p->if_stack_array_size *= 2; |
||
1248 | p->if_stack = reralloc(p->mem_ctx, p->if_stack, int, |
||
1249 | p->if_stack_array_size); |
||
1250 | } |
||
1251 | } |
||
1252 | |||
1253 | static brw_inst * |
||
1254 | pop_if_stack(struct brw_codegen *p) |
||
1255 | { |
||
1256 | p->if_stack_depth--; |
||
1257 | return &p->store[p->if_stack[p->if_stack_depth]]; |
||
1258 | } |
||
1259 | |||
1260 | static void |
||
1261 | push_loop_stack(struct brw_codegen *p, brw_inst *inst) |
||
1262 | { |
||
1263 | if (p->loop_stack_array_size < p->loop_stack_depth) { |
||
1264 | p->loop_stack_array_size *= 2; |
||
1265 | p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int, |
||
1266 | p->loop_stack_array_size); |
||
1267 | p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int, |
||
1268 | p->loop_stack_array_size); |
||
1269 | } |
||
1270 | |||
1271 | p->loop_stack[p->loop_stack_depth] = inst - p->store; |
||
1272 | p->loop_stack_depth++; |
||
1273 | p->if_depth_in_loop[p->loop_stack_depth] = 0; |
||
1274 | } |
||
1275 | |||
1276 | static brw_inst * |
||
1277 | get_inner_do_insn(struct brw_codegen *p) |
||
1278 | { |
||
1279 | return &p->store[p->loop_stack[p->loop_stack_depth - 1]]; |
||
1280 | } |
||
1281 | |||
1282 | /* EU takes the value from the flag register and pushes it onto some |
||
1283 | * sort of a stack (presumably merging with any flag value already on |
||
1284 | * the stack). Within an if block, the flags at the top of the stack |
||
1285 | * control execution on each channel of the unit, eg. on each of the |
||
1286 | * 16 pixel values in our wm programs. |
||
1287 | * |
||
1288 | * When the matching 'else' instruction is reached (presumably by |
||
1289 | * countdown of the instruction count patched in by our ELSE/ENDIF |
||
1290 | * functions), the relevant flags are inverted. |
||
1291 | * |
||
1292 | * When the matching 'endif' instruction is reached, the flags are |
||
1293 | * popped off. If the stack is now empty, normal execution resumes. |
||
1294 | */ |
||
1295 | brw_inst * |
||
1296 | brw_IF(struct brw_codegen *p, unsigned execute_size) |
||
1297 | { |
||
1298 | const struct brw_device_info *devinfo = p->devinfo; |
||
1299 | brw_inst *insn; |
||
1300 | |||
1301 | insn = next_insn(p, BRW_OPCODE_IF); |
||
1302 | |||
1303 | /* Override the defaults for this instruction: |
||
1304 | */ |
||
1305 | if (devinfo->gen < 6) { |
||
1306 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1307 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1308 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1309 | } else if (devinfo->gen == 6) { |
||
1310 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1311 | brw_inst_set_gen6_jump_count(devinfo, insn, 0); |
||
1312 | brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
||
1313 | brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
||
1314 | } else if (devinfo->gen == 7) { |
||
1315 | brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
||
1316 | brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
||
1317 | brw_set_src1(p, insn, brw_imm_w(0)); |
||
1318 | brw_inst_set_jip(devinfo, insn, 0); |
||
1319 | brw_inst_set_uip(devinfo, insn, 0); |
||
1320 | } else { |
||
1321 | brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D))); |
||
1322 | brw_set_src0(p, insn, brw_imm_d(0)); |
||
1323 | brw_inst_set_jip(devinfo, insn, 0); |
||
1324 | brw_inst_set_uip(devinfo, insn, 0); |
||
1325 | } |
||
1326 | |||
1327 | brw_inst_set_exec_size(devinfo, insn, execute_size); |
||
1328 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1329 | brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL); |
||
1330 | brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); |
||
1331 | if (!p->single_program_flow && devinfo->gen < 6) |
||
1332 | brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); |
||
1333 | |||
1334 | push_if_stack(p, insn); |
||
1335 | p->if_depth_in_loop[p->loop_stack_depth]++; |
||
1336 | return insn; |
||
1337 | } |
||
1338 | |||
1339 | /* This function is only used for gen6-style IF instructions with an |
||
1340 | * embedded comparison (conditional modifier). It is not used on gen7. |
||
1341 | */ |
||
1342 | brw_inst * |
||
1343 | gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional, |
||
1344 | struct brw_reg src0, struct brw_reg src1) |
||
1345 | { |
||
1346 | const struct brw_device_info *devinfo = p->devinfo; |
||
1347 | brw_inst *insn; |
||
1348 | |||
1349 | insn = next_insn(p, BRW_OPCODE_IF); |
||
1350 | |||
1351 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1352 | brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16 |
||
1353 | : BRW_EXECUTE_8); |
||
1354 | brw_inst_set_gen6_jump_count(devinfo, insn, 0); |
||
1355 | brw_set_src0(p, insn, src0); |
||
1356 | brw_set_src1(p, insn, src1); |
||
1357 | |||
1358 | assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE); |
||
1359 | assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); |
||
1360 | brw_inst_set_cond_modifier(devinfo, insn, conditional); |
||
1361 | |||
1362 | push_if_stack(p, insn); |
||
1363 | return insn; |
||
1364 | } |
||
1365 | |||
1366 | /** |
||
1367 | * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. |
||
1368 | */ |
||
1369 | static void |
||
1370 | convert_IF_ELSE_to_ADD(struct brw_codegen *p, |
||
1371 | brw_inst *if_inst, brw_inst *else_inst) |
||
1372 | { |
||
1373 | const struct brw_device_info *devinfo = p->devinfo; |
||
1374 | |||
1375 | /* The next instruction (where the ENDIF would be, if it existed) */ |
||
1376 | brw_inst *next_inst = &p->store[p->nr_insn]; |
||
1377 | |||
1378 | assert(p->single_program_flow); |
||
1379 | assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF); |
||
1380 | assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE); |
||
1381 | assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1); |
||
1382 | |||
1383 | /* Convert IF to an ADD instruction that moves the instruction pointer |
||
1384 | * to the first instruction of the ELSE block. If there is no ELSE |
||
1385 | * block, point to where ENDIF would be. Reverse the predicate. |
||
1386 | * |
||
1387 | * There's no need to execute an ENDIF since we don't need to do any |
||
1388 | * stack operations, and if we're currently executing, we just want to |
||
1389 | * continue normally. |
||
1390 | */ |
||
1391 | brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_ADD); |
||
1392 | brw_inst_set_pred_inv(devinfo, if_inst, true); |
||
1393 | |||
1394 | if (else_inst != NULL) { |
||
1395 | /* Convert ELSE to an ADD instruction that points where the ENDIF |
||
1396 | * would be. |
||
1397 | */ |
||
1398 | brw_inst_set_opcode(devinfo, else_inst, BRW_OPCODE_ADD); |
||
1399 | |||
1400 | brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16); |
||
1401 | brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16); |
||
1402 | } else { |
||
1403 | brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16); |
||
1404 | } |
||
1405 | } |
||
1406 | |||
1407 | /** |
||
1408 | * Patch IF and ELSE instructions with appropriate jump targets. |
||
1409 | */ |
||
1410 | static void |
||
1411 | patch_IF_ELSE(struct brw_codegen *p, |
||
1412 | brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst) |
||
1413 | { |
||
1414 | const struct brw_device_info *devinfo = p->devinfo; |
||
1415 | |||
1416 | /* We shouldn't be patching IF and ELSE instructions in single program flow |
||
1417 | * mode when gen < 6, because in single program flow mode on those |
||
1418 | * platforms, we convert flow control instructions to conditional ADDs that |
||
1419 | * operate on IP (see brw_ENDIF). |
||
1420 | * |
||
1421 | * However, on Gen6, writing to IP doesn't work in single program flow mode |
||
1422 | * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may |
||
1423 | * not be updated by non-flow control instructions."). And on later |
||
1424 | * platforms, there is no significant benefit to converting control flow |
||
1425 | * instructions to conditional ADDs. So we do patch IF and ELSE |
||
1426 | * instructions in single program flow mode on those platforms. |
||
1427 | */ |
||
1428 | if (devinfo->gen < 6) |
||
1429 | assert(!p->single_program_flow); |
||
1430 | |||
1431 | assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF); |
||
1432 | assert(endif_inst != NULL); |
||
1433 | assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE); |
||
1434 | |||
1435 | unsigned br = brw_jump_scale(devinfo); |
||
1436 | |||
1437 | assert(brw_inst_opcode(devinfo, endif_inst) == BRW_OPCODE_ENDIF); |
||
1438 | brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst)); |
||
1439 | |||
1440 | if (else_inst == NULL) { |
||
1441 | /* Patch IF -> ENDIF */ |
||
1442 | if (devinfo->gen < 6) { |
||
1443 | /* Turn it into an IFF, which means no mask stack operations for |
||
1444 | * all-false and jumping past the ENDIF. |
||
1445 | */ |
||
1446 | brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF); |
||
1447 | brw_inst_set_gen4_jump_count(devinfo, if_inst, |
||
1448 | br * (endif_inst - if_inst + 1)); |
||
1449 | brw_inst_set_gen4_pop_count(devinfo, if_inst, 0); |
||
1450 | } else if (devinfo->gen == 6) { |
||
1451 | /* As of gen6, there is no IFF and IF must point to the ENDIF. */ |
||
1452 | brw_inst_set_gen6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst)); |
||
1453 | } else { |
||
1454 | brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); |
||
1455 | brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst)); |
||
1456 | } |
||
1457 | } else { |
||
1458 | brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst)); |
||
1459 | |||
1460 | /* Patch IF -> ELSE */ |
||
1461 | if (devinfo->gen < 6) { |
||
1462 | brw_inst_set_gen4_jump_count(devinfo, if_inst, |
||
1463 | br * (else_inst - if_inst)); |
||
1464 | brw_inst_set_gen4_pop_count(devinfo, if_inst, 0); |
||
1465 | } else if (devinfo->gen == 6) { |
||
1466 | brw_inst_set_gen6_jump_count(devinfo, if_inst, |
||
1467 | br * (else_inst - if_inst + 1)); |
||
1468 | } |
||
1469 | |||
1470 | /* Patch ELSE -> ENDIF */ |
||
1471 | if (devinfo->gen < 6) { |
||
1472 | /* BRW_OPCODE_ELSE pre-gen6 should point just past the |
||
1473 | * matching ENDIF. |
||
1474 | */ |
||
1475 | brw_inst_set_gen4_jump_count(devinfo, else_inst, |
||
1476 | br * (endif_inst - else_inst + 1)); |
||
1477 | brw_inst_set_gen4_pop_count(devinfo, else_inst, 1); |
||
1478 | } else if (devinfo->gen == 6) { |
||
1479 | /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ |
||
1480 | brw_inst_set_gen6_jump_count(devinfo, else_inst, |
||
1481 | br * (endif_inst - else_inst)); |
||
1482 | } else { |
||
1483 | /* The IF instruction's JIP should point just past the ELSE */ |
||
1484 | brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1)); |
||
1485 | /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ |
||
1486 | brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst)); |
||
1487 | brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst)); |
||
1488 | if (devinfo->gen >= 8) { |
||
1489 | /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both |
||
1490 | * should point to ENDIF. |
||
1491 | */ |
||
1492 | brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst)); |
||
1493 | } |
||
1494 | } |
||
1495 | } |
||
1496 | } |
||
1497 | |||
1498 | void |
||
1499 | brw_ELSE(struct brw_codegen *p) |
||
1500 | { |
||
1501 | const struct brw_device_info *devinfo = p->devinfo; |
||
1502 | brw_inst *insn; |
||
1503 | |||
1504 | insn = next_insn(p, BRW_OPCODE_ELSE); |
||
1505 | |||
1506 | if (devinfo->gen < 6) { |
||
1507 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1508 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1509 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1510 | } else if (devinfo->gen == 6) { |
||
1511 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1512 | brw_inst_set_gen6_jump_count(devinfo, insn, 0); |
||
1513 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1514 | brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1515 | } else if (devinfo->gen == 7) { |
||
1516 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1517 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1518 | brw_set_src1(p, insn, brw_imm_w(0)); |
||
1519 | brw_inst_set_jip(devinfo, insn, 0); |
||
1520 | brw_inst_set_uip(devinfo, insn, 0); |
||
1521 | } else { |
||
1522 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1523 | brw_set_src0(p, insn, brw_imm_d(0)); |
||
1524 | brw_inst_set_jip(devinfo, insn, 0); |
||
1525 | brw_inst_set_uip(devinfo, insn, 0); |
||
1526 | } |
||
1527 | |||
1528 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1529 | brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); |
||
1530 | if (!p->single_program_flow && devinfo->gen < 6) |
||
1531 | brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); |
||
1532 | |||
1533 | push_if_stack(p, insn); |
||
1534 | } |
||
1535 | |||
1536 | void |
||
1537 | brw_ENDIF(struct brw_codegen *p) |
||
1538 | { |
||
1539 | const struct brw_device_info *devinfo = p->devinfo; |
||
1540 | brw_inst *insn = NULL; |
||
1541 | brw_inst *else_inst = NULL; |
||
1542 | brw_inst *if_inst = NULL; |
||
1543 | brw_inst *tmp; |
||
1544 | bool emit_endif = true; |
||
1545 | |||
1546 | /* In single program flow mode, we can express IF and ELSE instructions |
||
1547 | * equivalently as ADD instructions that operate on IP. On platforms prior |
||
1548 | * to Gen6, flow control instructions cause an implied thread switch, so |
||
1549 | * this is a significant savings. |
||
1550 | * |
||
1551 | * However, on Gen6, writing to IP doesn't work in single program flow mode |
||
1552 | * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may |
||
1553 | * not be updated by non-flow control instructions."). And on later |
||
1554 | * platforms, there is no significant benefit to converting control flow |
||
1555 | * instructions to conditional ADDs. So we only do this trick on Gen4 and |
||
1556 | * Gen5. |
||
1557 | */ |
||
1558 | if (devinfo->gen < 6 && p->single_program_flow) |
||
1559 | emit_endif = false; |
||
1560 | |||
1561 | /* |
||
1562 | * A single next_insn() may change the base address of instruction store |
||
1563 | * memory(p->store), so call it first before referencing the instruction |
||
1564 | * store pointer from an index |
||
1565 | */ |
||
1566 | if (emit_endif) |
||
1567 | insn = next_insn(p, BRW_OPCODE_ENDIF); |
||
1568 | |||
1569 | /* Pop the IF and (optional) ELSE instructions from the stack */ |
||
1570 | p->if_depth_in_loop[p->loop_stack_depth]--; |
||
1571 | tmp = pop_if_stack(p); |
||
1572 | if (brw_inst_opcode(devinfo, tmp) == BRW_OPCODE_ELSE) { |
||
1573 | else_inst = tmp; |
||
1574 | tmp = pop_if_stack(p); |
||
1575 | } |
||
1576 | if_inst = tmp; |
||
1577 | |||
1578 | if (!emit_endif) { |
||
1579 | /* ENDIF is useless; don't bother emitting it. */ |
||
1580 | convert_IF_ELSE_to_ADD(p, if_inst, else_inst); |
||
1581 | return; |
||
1582 | } |
||
1583 | |||
1584 | if (devinfo->gen < 6) { |
||
1585 | brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); |
||
1586 | brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); |
||
1587 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1588 | } else if (devinfo->gen == 6) { |
||
1589 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1590 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1591 | brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1592 | } else if (devinfo->gen == 7) { |
||
1593 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1594 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1595 | brw_set_src1(p, insn, brw_imm_w(0)); |
||
1596 | } else { |
||
1597 | brw_set_src0(p, insn, brw_imm_d(0)); |
||
1598 | } |
||
1599 | |||
1600 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1601 | brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE); |
||
1602 | if (devinfo->gen < 6) |
||
1603 | brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); |
||
1604 | |||
1605 | /* Also pop item off the stack in the endif instruction: */ |
||
1606 | if (devinfo->gen < 6) { |
||
1607 | brw_inst_set_gen4_jump_count(devinfo, insn, 0); |
||
1608 | brw_inst_set_gen4_pop_count(devinfo, insn, 1); |
||
1609 | } else if (devinfo->gen == 6) { |
||
1610 | brw_inst_set_gen6_jump_count(devinfo, insn, 2); |
||
1611 | } else { |
||
1612 | brw_inst_set_jip(devinfo, insn, 2); |
||
1613 | } |
||
1614 | patch_IF_ELSE(p, if_inst, else_inst, insn); |
||
1615 | } |
||
1616 | |||
1617 | brw_inst * |
||
1618 | brw_BREAK(struct brw_codegen *p) |
||
1619 | { |
||
1620 | const struct brw_device_info *devinfo = p->devinfo; |
||
1621 | brw_inst *insn; |
||
1622 | |||
1623 | insn = next_insn(p, BRW_OPCODE_BREAK); |
||
1624 | if (devinfo->gen >= 8) { |
||
1625 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1626 | brw_set_src0(p, insn, brw_imm_d(0x0)); |
||
1627 | } else if (devinfo->gen >= 6) { |
||
1628 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1629 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1630 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1631 | } else { |
||
1632 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1633 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1634 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1635 | brw_inst_set_gen4_pop_count(devinfo, insn, |
||
1636 | p->if_depth_in_loop[p->loop_stack_depth]); |
||
1637 | } |
||
1638 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1639 | brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16 |
||
1640 | : BRW_EXECUTE_8); |
||
1641 | |||
1642 | return insn; |
||
1643 | } |
||
1644 | |||
1645 | brw_inst * |
||
1646 | brw_CONT(struct brw_codegen *p) |
||
1647 | { |
||
1648 | const struct brw_device_info *devinfo = p->devinfo; |
||
1649 | brw_inst *insn; |
||
1650 | |||
1651 | insn = next_insn(p, BRW_OPCODE_CONTINUE); |
||
1652 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1653 | if (devinfo->gen >= 8) { |
||
1654 | brw_set_src0(p, insn, brw_imm_d(0x0)); |
||
1655 | } else { |
||
1656 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1657 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1658 | } |
||
1659 | |||
1660 | if (devinfo->gen < 6) { |
||
1661 | brw_inst_set_gen4_pop_count(devinfo, insn, |
||
1662 | p->if_depth_in_loop[p->loop_stack_depth]); |
||
1663 | } |
||
1664 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1665 | brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16 |
||
1666 | : BRW_EXECUTE_8); |
||
1667 | return insn; |
||
1668 | } |
||
1669 | |||
1670 | brw_inst * |
||
1671 | gen6_HALT(struct brw_codegen *p) |
||
1672 | { |
||
1673 | const struct brw_device_info *devinfo = p->devinfo; |
||
1674 | brw_inst *insn; |
||
1675 | |||
1676 | insn = next_insn(p, BRW_OPCODE_HALT); |
||
1677 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1678 | if (devinfo->gen >= 8) { |
||
1679 | brw_set_src0(p, insn, brw_imm_d(0x0)); |
||
1680 | } else { |
||
1681 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1682 | brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */ |
||
1683 | } |
||
1684 | |||
1685 | if (p->compressed) { |
||
1686 | brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_16); |
||
1687 | } else { |
||
1688 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1689 | brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_8); |
||
1690 | } |
||
1691 | return insn; |
||
1692 | } |
||
1693 | |||
1694 | /* DO/WHILE loop: |
||
1695 | * |
||
1696 | * The DO/WHILE is just an unterminated loop -- break or continue are |
||
1697 | * used for control within the loop. We have a few ways they can be |
||
1698 | * done. |
||
1699 | * |
||
1700 | * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, |
||
1701 | * jip and no DO instruction. |
||
1702 | * |
||
1703 | * For non-uniform control flow pre-gen6, there's a DO instruction to |
||
1704 | * push the mask, and a WHILE to jump back, and BREAK to get out and |
||
1705 | * pop the mask. |
||
1706 | * |
||
1707 | * For gen6, there's no more mask stack, so no need for DO. WHILE |
||
1708 | * just points back to the first instruction of the loop. |
||
1709 | */ |
||
1710 | brw_inst * |
||
1711 | brw_DO(struct brw_codegen *p, unsigned execute_size) |
||
1712 | { |
||
1713 | const struct brw_device_info *devinfo = p->devinfo; |
||
1714 | |||
1715 | if (devinfo->gen >= 6 || p->single_program_flow) { |
||
1716 | push_loop_stack(p, &p->store[p->nr_insn]); |
||
1717 | return &p->store[p->nr_insn]; |
||
1718 | } else { |
||
1719 | brw_inst *insn = next_insn(p, BRW_OPCODE_DO); |
||
1720 | |||
1721 | push_loop_stack(p, insn); |
||
1722 | |||
1723 | /* Override the defaults for this instruction: |
||
1724 | */ |
||
1725 | brw_set_dest(p, insn, brw_null_reg()); |
||
1726 | brw_set_src0(p, insn, brw_null_reg()); |
||
1727 | brw_set_src1(p, insn, brw_null_reg()); |
||
1728 | |||
1729 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1730 | brw_inst_set_exec_size(devinfo, insn, execute_size); |
||
1731 | brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); |
||
1732 | |||
1733 | return insn; |
||
1734 | } |
||
1735 | } |
||
1736 | |||
1737 | /** |
||
1738 | * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE |
||
1739 | * instruction here. |
||
1740 | * |
||
1741 | * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop |
||
1742 | * nesting, since it can always just point to the end of the block/current loop. |
||
1743 | */ |
||
1744 | static void |
||
1745 | brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst) |
||
1746 | { |
||
1747 | const struct brw_device_info *devinfo = p->devinfo; |
||
1748 | brw_inst *do_inst = get_inner_do_insn(p); |
||
1749 | brw_inst *inst; |
||
1750 | unsigned br = brw_jump_scale(devinfo); |
||
1751 | |||
1752 | assert(devinfo->gen < 6); |
||
1753 | |||
1754 | for (inst = while_inst - 1; inst != do_inst; inst--) { |
||
1755 | /* If the jump count is != 0, that means that this instruction has already |
||
1756 | * been patched because it's part of a loop inside of the one we're |
||
1757 | * patching. |
||
1758 | */ |
||
1759 | if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK && |
||
1760 | brw_inst_gen4_jump_count(devinfo, inst) == 0) { |
||
1761 | brw_inst_set_gen4_jump_count(devinfo, inst, br*((while_inst - inst) + 1)); |
||
1762 | } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE && |
||
1763 | brw_inst_gen4_jump_count(devinfo, inst) == 0) { |
||
1764 | brw_inst_set_gen4_jump_count(devinfo, inst, br * (while_inst - inst)); |
||
1765 | } |
||
1766 | } |
||
1767 | } |
||
1768 | |||
1769 | brw_inst * |
||
1770 | brw_WHILE(struct brw_codegen *p) |
||
1771 | { |
||
1772 | const struct brw_device_info *devinfo = p->devinfo; |
||
1773 | brw_inst *insn, *do_insn; |
||
1774 | unsigned br = brw_jump_scale(devinfo); |
||
1775 | |||
1776 | if (devinfo->gen >= 6) { |
||
1777 | insn = next_insn(p, BRW_OPCODE_WHILE); |
||
1778 | do_insn = get_inner_do_insn(p); |
||
1779 | |||
1780 | if (devinfo->gen >= 8) { |
||
1781 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1782 | brw_set_src0(p, insn, brw_imm_d(0)); |
||
1783 | brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); |
||
1784 | } else if (devinfo->gen == 7) { |
||
1785 | brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1786 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1787 | brw_set_src1(p, insn, brw_imm_w(0)); |
||
1788 | brw_inst_set_jip(devinfo, insn, br * (do_insn - insn)); |
||
1789 | } else { |
||
1790 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1791 | brw_inst_set_gen6_jump_count(devinfo, insn, br * (do_insn - insn)); |
||
1792 | brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1793 | brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); |
||
1794 | } |
||
1795 | |||
1796 | brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16 |
||
1797 | : BRW_EXECUTE_8); |
||
1798 | } else { |
||
1799 | if (p->single_program_flow) { |
||
1800 | insn = next_insn(p, BRW_OPCODE_ADD); |
||
1801 | do_insn = get_inner_do_insn(p); |
||
1802 | |||
1803 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1804 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1805 | brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); |
||
1806 | brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1); |
||
1807 | } else { |
||
1808 | insn = next_insn(p, BRW_OPCODE_WHILE); |
||
1809 | do_insn = get_inner_do_insn(p); |
||
1810 | |||
1811 | assert(brw_inst_opcode(devinfo, do_insn) == BRW_OPCODE_DO); |
||
1812 | |||
1813 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1814 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1815 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
1816 | |||
1817 | brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn)); |
||
1818 | brw_inst_set_gen4_jump_count(devinfo, insn, br * (do_insn - insn + 1)); |
||
1819 | brw_inst_set_gen4_pop_count(devinfo, insn, 0); |
||
1820 | |||
1821 | brw_patch_break_cont(p, insn); |
||
1822 | } |
||
1823 | } |
||
1824 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
1825 | |||
1826 | p->loop_stack_depth--; |
||
1827 | |||
1828 | return insn; |
||
1829 | } |
||
1830 | |||
1831 | /* FORWARD JUMPS: |
||
1832 | */ |
||
1833 | void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx) |
||
1834 | { |
||
1835 | const struct brw_device_info *devinfo = p->devinfo; |
||
1836 | brw_inst *jmp_insn = &p->store[jmp_insn_idx]; |
||
1837 | unsigned jmpi = 1; |
||
1838 | |||
1839 | if (devinfo->gen >= 5) |
||
1840 | jmpi = 2; |
||
1841 | |||
1842 | assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI); |
||
1843 | assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE); |
||
1844 | |||
1845 | brw_inst_set_gen4_jump_count(devinfo, jmp_insn, |
||
1846 | jmpi * (p->nr_insn - jmp_insn_idx - 1)); |
||
1847 | } |
||
1848 | |||
1849 | /* To integrate with the above, it makes sense that the comparison |
||
1850 | * instruction should populate the flag register. It might be simpler |
||
1851 | * just to use the flag reg for most WM tasks? |
||
1852 | */ |
||
1853 | void brw_CMP(struct brw_codegen *p, |
||
1854 | struct brw_reg dest, |
||
1855 | unsigned conditional, |
||
1856 | struct brw_reg src0, |
||
1857 | struct brw_reg src1) |
||
1858 | { |
||
1859 | const struct brw_device_info *devinfo = p->devinfo; |
||
1860 | brw_inst *insn = next_insn(p, BRW_OPCODE_CMP); |
||
1861 | |||
1862 | brw_inst_set_cond_modifier(devinfo, insn, conditional); |
||
1863 | brw_set_dest(p, insn, dest); |
||
1864 | brw_set_src0(p, insn, src0); |
||
1865 | brw_set_src1(p, insn, src1); |
||
1866 | |||
1867 | /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds |
||
1868 | * page says: |
||
1869 | * "Any CMP instruction with a null destination must use a {switch}." |
||
1870 | * |
||
1871 | * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't |
||
1872 | * mentioned on their work-arounds pages. |
||
1873 | */ |
||
1874 | if (devinfo->gen == 7) { |
||
1875 | if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
1876 | dest.nr == BRW_ARF_NULL) { |
||
1877 | brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH); |
||
1878 | } |
||
1879 | } |
||
1880 | } |
||
1881 | |||
1882 | /*********************************************************************** |
||
1883 | * Helpers for the various SEND message types: |
||
1884 | */ |
||
1885 | |||
1886 | /** Extended math function, float[8]. |
||
1887 | */ |
||
1888 | void gen4_math(struct brw_codegen *p, |
||
1889 | struct brw_reg dest, |
||
1890 | unsigned function, |
||
1891 | unsigned msg_reg_nr, |
||
1892 | struct brw_reg src, |
||
1893 | unsigned precision ) |
||
1894 | { |
||
1895 | const struct brw_device_info *devinfo = p->devinfo; |
||
1896 | brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
1897 | unsigned data_type; |
||
1898 | if (has_scalar_region(src)) { |
||
1899 | data_type = BRW_MATH_DATA_SCALAR; |
||
1900 | } else { |
||
1901 | data_type = BRW_MATH_DATA_VECTOR; |
||
1902 | } |
||
1903 | |||
1904 | assert(devinfo->gen < 6); |
||
1905 | |||
1906 | /* Example code doesn't set predicate_control for send |
||
1907 | * instructions. |
||
1908 | */ |
||
1909 | brw_inst_set_pred_control(devinfo, insn, 0); |
||
1910 | brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); |
||
1911 | |||
1912 | brw_set_dest(p, insn, dest); |
||
1913 | brw_set_src0(p, insn, src); |
||
1914 | brw_set_math_message(p, |
||
1915 | insn, |
||
1916 | function, |
||
1917 | src.type == BRW_REGISTER_TYPE_D, |
||
1918 | precision, |
||
1919 | data_type); |
||
1920 | } |
||
1921 | |||
1922 | void gen6_math(struct brw_codegen *p, |
||
1923 | struct brw_reg dest, |
||
1924 | unsigned function, |
||
1925 | struct brw_reg src0, |
||
1926 | struct brw_reg src1) |
||
1927 | { |
||
1928 | const struct brw_device_info *devinfo = p->devinfo; |
||
1929 | brw_inst *insn = next_insn(p, BRW_OPCODE_MATH); |
||
1930 | |||
1931 | assert(devinfo->gen >= 6); |
||
1932 | |||
1933 | assert(dest.file == BRW_GENERAL_REGISTER_FILE || |
||
1934 | (devinfo->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE)); |
||
1935 | assert(src0.file == BRW_GENERAL_REGISTER_FILE || |
||
1936 | (devinfo->gen >= 8 && src0.file == BRW_IMMEDIATE_VALUE)); |
||
1937 | |||
1938 | assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1939 | if (devinfo->gen == 6) { |
||
1940 | assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1941 | assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1942 | } |
||
1943 | |||
1944 | if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || |
||
1945 | function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER || |
||
1946 | function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { |
||
1947 | assert(src0.type != BRW_REGISTER_TYPE_F); |
||
1948 | assert(src1.type != BRW_REGISTER_TYPE_F); |
||
1949 | assert(src1.file == BRW_GENERAL_REGISTER_FILE || |
||
1950 | (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); |
||
1951 | } else { |
||
1952 | assert(src0.type == BRW_REGISTER_TYPE_F); |
||
1953 | assert(src1.type == BRW_REGISTER_TYPE_F); |
||
1954 | if (function == BRW_MATH_FUNCTION_POW) { |
||
1955 | assert(src1.file == BRW_GENERAL_REGISTER_FILE || |
||
1956 | (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE)); |
||
1957 | } else { |
||
1958 | assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
1959 | src1.nr == BRW_ARF_NULL); |
||
1960 | } |
||
1961 | } |
||
1962 | |||
1963 | /* Source modifiers are ignored for extended math instructions on Gen6. */ |
||
1964 | if (devinfo->gen == 6) { |
||
1965 | assert(!src0.negate); |
||
1966 | assert(!src0.abs); |
||
1967 | assert(!src1.negate); |
||
1968 | assert(!src1.abs); |
||
1969 | } |
||
1970 | |||
1971 | brw_inst_set_math_function(devinfo, insn, function); |
||
1972 | |||
1973 | brw_set_dest(p, insn, dest); |
||
1974 | brw_set_src0(p, insn, src0); |
||
1975 | brw_set_src1(p, insn, src1); |
||
1976 | } |
||
1977 | |||
1978 | |||
1979 | /** |
||
1980 | * Write a block of OWORDs (half a GRF each) from the scratch buffer, |
||
1981 | * using a constant offset per channel. |
||
1982 | * |
||
1983 | * The offset must be aligned to oword size (16 bytes). Used for |
||
1984 | * register spilling. |
||
1985 | */ |
||
1986 | void brw_oword_block_write_scratch(struct brw_codegen *p, |
||
1987 | struct brw_reg mrf, |
||
1988 | int num_regs, |
||
1989 | unsigned offset) |
||
1990 | { |
||
1991 | const struct brw_device_info *devinfo = p->devinfo; |
||
1992 | uint32_t msg_control, msg_type; |
||
1993 | int mlen; |
||
1994 | |||
1995 | if (devinfo->gen >= 6) |
||
1996 | offset /= 16; |
||
1997 | |||
1998 | mrf = retype(mrf, BRW_REGISTER_TYPE_UD); |
||
1999 | |||
2000 | if (num_regs == 1) { |
||
2001 | msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
||
2002 | mlen = 2; |
||
2003 | } else { |
||
2004 | msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
||
2005 | mlen = 3; |
||
2006 | } |
||
2007 | |||
2008 | /* Set up the message header. This is g0, with g0.2 filled with |
||
2009 | * the offset. We don't want to leave our offset around in g0 or |
||
2010 | * it'll screw up texture samples, so set it up inside the message |
||
2011 | * reg. |
||
2012 | */ |
||
2013 | { |
||
2014 | brw_push_insn_state(p); |
||
2015 | brw_set_default_exec_size(p, BRW_EXECUTE_8); |
||
2016 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2017 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
2018 | |||
2019 | brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); |
||
2020 | |||
2021 | /* set message header global offset field (reg 0, element 2) */ |
||
2022 | brw_MOV(p, |
||
2023 | retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, |
||
2024 | mrf.nr, |
||
2025 | 2), BRW_REGISTER_TYPE_UD), |
||
2026 | brw_imm_ud(offset)); |
||
2027 | |||
2028 | brw_pop_insn_state(p); |
||
2029 | } |
||
2030 | |||
2031 | { |
||
2032 | struct brw_reg dest; |
||
2033 | brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
2034 | int send_commit_msg; |
||
2035 | struct brw_reg src_header = retype(brw_vec8_grf(0, 0), |
||
2036 | BRW_REGISTER_TYPE_UW); |
||
2037 | |||
2038 | if (brw_inst_qtr_control(devinfo, insn) != BRW_COMPRESSION_NONE) { |
||
2039 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
2040 | src_header = vec16(src_header); |
||
2041 | } |
||
2042 | assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); |
||
2043 | if (devinfo->gen < 6) |
||
2044 | brw_inst_set_base_mrf(devinfo, insn, mrf.nr); |
||
2045 | |||
2046 | /* Until gen6, writes followed by reads from the same location |
||
2047 | * are not guaranteed to be ordered unless write_commit is set. |
||
2048 | * If set, then a no-op write is issued to the destination |
||
2049 | * register to set a dependency, and a read from the destination |
||
2050 | * can be used to ensure the ordering. |
||
2051 | * |
||
2052 | * For gen6, only writes between different threads need ordering |
||
2053 | * protection. Our use of DP writes is all about register |
||
2054 | * spilling within a thread. |
||
2055 | */ |
||
2056 | if (devinfo->gen >= 6) { |
||
2057 | dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); |
||
2058 | send_commit_msg = 0; |
||
2059 | } else { |
||
2060 | dest = src_header; |
||
2061 | send_commit_msg = 1; |
||
2062 | } |
||
2063 | |||
2064 | brw_set_dest(p, insn, dest); |
||
2065 | if (devinfo->gen >= 6) { |
||
2066 | brw_set_src0(p, insn, mrf); |
||
2067 | } else { |
||
2068 | brw_set_src0(p, insn, brw_null_reg()); |
||
2069 | } |
||
2070 | |||
2071 | if (devinfo->gen >= 6) |
||
2072 | msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
||
2073 | else |
||
2074 | msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
||
2075 | |||
2076 | brw_set_dp_write_message(p, |
||
2077 | insn, |
||
2078 | 255, /* binding table index (255=stateless) */ |
||
2079 | msg_control, |
||
2080 | msg_type, |
||
2081 | mlen, |
||
2082 | true, /* header_present */ |
||
2083 | 0, /* not a render target */ |
||
2084 | send_commit_msg, /* response_length */ |
||
2085 | 0, /* eot */ |
||
2086 | send_commit_msg); |
||
2087 | } |
||
2088 | } |
||
2089 | |||
2090 | |||
2091 | /** |
||
2092 | * Read a block of owords (half a GRF each) from the scratch buffer |
||
2093 | * using a constant index per channel. |
||
2094 | * |
||
2095 | * Offset must be aligned to oword size (16 bytes). Used for register |
||
2096 | * spilling. |
||
2097 | */ |
||
2098 | void |
||
2099 | brw_oword_block_read_scratch(struct brw_codegen *p, |
||
2100 | struct brw_reg dest, |
||
2101 | struct brw_reg mrf, |
||
2102 | int num_regs, |
||
2103 | unsigned offset) |
||
2104 | { |
||
2105 | const struct brw_device_info *devinfo = p->devinfo; |
||
2106 | uint32_t msg_control; |
||
2107 | int rlen; |
||
2108 | |||
2109 | if (devinfo->gen >= 6) |
||
2110 | offset /= 16; |
||
2111 | |||
2112 | if (p->devinfo->gen >= 7) { |
||
2113 | /* On gen 7 and above, we no longer have message registers and we can |
||
2114 | * send from any register we want. By using the destination register |
||
2115 | * for the message, we guarantee that the implied message write won't |
||
2116 | * accidentally overwrite anything. This has been a problem because |
||
2117 | * the MRF registers and source for the final FB write are both fixed |
||
2118 | * and may overlap. |
||
2119 | */ |
||
2120 | mrf = retype(dest, BRW_REGISTER_TYPE_UD); |
||
2121 | } else { |
||
2122 | mrf = retype(mrf, BRW_REGISTER_TYPE_UD); |
||
2123 | } |
||
2124 | dest = retype(dest, BRW_REGISTER_TYPE_UW); |
||
2125 | |||
2126 | if (num_regs == 1) { |
||
2127 | msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
||
2128 | rlen = 1; |
||
2129 | } else { |
||
2130 | msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
||
2131 | rlen = 2; |
||
2132 | } |
||
2133 | |||
2134 | { |
||
2135 | brw_push_insn_state(p); |
||
2136 | brw_set_default_exec_size(p, BRW_EXECUTE_8); |
||
2137 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
2138 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2139 | |||
2140 | brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); |
||
2141 | |||
2142 | /* set message header global offset field (reg 0, element 2) */ |
||
2143 | brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset)); |
||
2144 | |||
2145 | brw_pop_insn_state(p); |
||
2146 | } |
||
2147 | |||
2148 | { |
||
2149 | brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
2150 | |||
2151 | assert(brw_inst_pred_control(devinfo, insn) == 0); |
||
2152 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
2153 | |||
2154 | brw_set_dest(p, insn, dest); /* UW? */ |
||
2155 | if (devinfo->gen >= 6) { |
||
2156 | brw_set_src0(p, insn, mrf); |
||
2157 | } else { |
||
2158 | brw_set_src0(p, insn, brw_null_reg()); |
||
2159 | brw_inst_set_base_mrf(devinfo, insn, mrf.nr); |
||
2160 | } |
||
2161 | |||
2162 | brw_set_dp_read_message(p, |
||
2163 | insn, |
||
2164 | 255, /* binding table index (255=stateless) */ |
||
2165 | msg_control, |
||
2166 | BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ |
||
2167 | BRW_DATAPORT_READ_TARGET_RENDER_CACHE, |
||
2168 | 1, /* msg_length */ |
||
2169 | true, /* header_present */ |
||
2170 | rlen); |
||
2171 | } |
||
2172 | } |
||
2173 | |||
2174 | void |
||
2175 | gen7_block_read_scratch(struct brw_codegen *p, |
||
2176 | struct brw_reg dest, |
||
2177 | int num_regs, |
||
2178 | unsigned offset) |
||
2179 | { |
||
2180 | const struct brw_device_info *devinfo = p->devinfo; |
||
2181 | brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
2182 | assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE); |
||
2183 | |||
2184 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
2185 | brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW)); |
||
2186 | |||
2187 | /* The HW requires that the header is present; this is to get the g0.5 |
||
2188 | * scratch offset. |
||
2189 | */ |
||
2190 | brw_set_src0(p, insn, brw_vec8_grf(0, 0)); |
||
2191 | |||
2192 | /* According to the docs, offset is "A 12-bit HWord offset into the memory |
||
2193 | * Immediate Memory buffer as specified by binding table 0xFF." An HWORD |
||
2194 | * is 32 bytes, which happens to be the size of a register. |
||
2195 | */ |
||
2196 | offset /= REG_SIZE; |
||
2197 | assert(offset < (1 << 12)); |
||
2198 | |||
2199 | gen7_set_dp_scratch_message(p, insn, |
||
2200 | false, /* scratch read */ |
||
2201 | false, /* OWords */ |
||
2202 | false, /* invalidate after read */ |
||
2203 | num_regs, |
||
2204 | offset, |
||
2205 | 1, /* mlen: just g0 */ |
||
2206 | num_regs, /* rlen */ |
||
2207 | true); /* header present */ |
||
2208 | } |
||
2209 | |||
2210 | /** |
||
2211 | * Read a float[4] vector from the data port Data Cache (const buffer). |
||
2212 | * Location (in buffer) should be a multiple of 16. |
||
2213 | * Used for fetching shader constants. |
||
2214 | */ |
||
2215 | void brw_oword_block_read(struct brw_codegen *p, |
||
2216 | struct brw_reg dest, |
||
2217 | struct brw_reg mrf, |
||
2218 | uint32_t offset, |
||
2219 | uint32_t bind_table_index) |
||
2220 | { |
||
2221 | const struct brw_device_info *devinfo = p->devinfo; |
||
2222 | |||
2223 | /* On newer hardware, offset is in units of owords. */ |
||
2224 | if (devinfo->gen >= 6) |
||
2225 | offset /= 16; |
||
2226 | |||
2227 | mrf = retype(mrf, BRW_REGISTER_TYPE_UD); |
||
2228 | |||
2229 | brw_push_insn_state(p); |
||
2230 | brw_set_default_exec_size(p, BRW_EXECUTE_8); |
||
2231 | brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); |
||
2232 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
2233 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2234 | |||
2235 | brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); |
||
2236 | |||
2237 | /* set message header global offset field (reg 0, element 2) */ |
||
2238 | brw_MOV(p, |
||
2239 | retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, |
||
2240 | mrf.nr, |
||
2241 | 2), BRW_REGISTER_TYPE_UD), |
||
2242 | brw_imm_ud(offset)); |
||
2243 | |||
2244 | brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
2245 | |||
2246 | /* cast dest to a uword[8] vector */ |
||
2247 | dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); |
||
2248 | |||
2249 | brw_set_dest(p, insn, dest); |
||
2250 | if (devinfo->gen >= 6) { |
||
2251 | brw_set_src0(p, insn, mrf); |
||
2252 | } else { |
||
2253 | brw_set_src0(p, insn, brw_null_reg()); |
||
2254 | brw_inst_set_base_mrf(devinfo, insn, mrf.nr); |
||
2255 | } |
||
2256 | |||
2257 | brw_set_dp_read_message(p, |
||
2258 | insn, |
||
2259 | bind_table_index, |
||
2260 | BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, |
||
2261 | BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, |
||
2262 | BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
||
2263 | 1, /* msg_length */ |
||
2264 | true, /* header_present */ |
||
2265 | 1); /* response_length (1 reg, 2 owords!) */ |
||
2266 | |||
2267 | brw_pop_insn_state(p); |
||
2268 | } |
||
2269 | |||
2270 | |||
2271 | void brw_fb_WRITE(struct brw_codegen *p, |
||
2272 | int dispatch_width, |
||
2273 | struct brw_reg payload, |
||
2274 | struct brw_reg implied_header, |
||
2275 | unsigned msg_control, |
||
2276 | unsigned binding_table_index, |
||
2277 | unsigned msg_length, |
||
2278 | unsigned response_length, |
||
2279 | bool eot, |
||
2280 | bool last_render_target, |
||
2281 | bool header_present) |
||
2282 | { |
||
2283 | const struct brw_device_info *devinfo = p->devinfo; |
||
2284 | brw_inst *insn; |
||
2285 | unsigned msg_type; |
||
2286 | struct brw_reg dest, src0; |
||
2287 | |||
2288 | if (dispatch_width == 16) |
||
2289 | dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); |
||
2290 | else |
||
2291 | dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); |
||
2292 | |||
2293 | if (devinfo->gen >= 6) { |
||
2294 | insn = next_insn(p, BRW_OPCODE_SENDC); |
||
2295 | } else { |
||
2296 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
2297 | } |
||
2298 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
2299 | |||
2300 | if (devinfo->gen >= 6) { |
||
2301 | /* headerless version, just submit color payload */ |
||
2302 | src0 = payload; |
||
2303 | |||
2304 | msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
||
2305 | } else { |
||
2306 | assert(payload.file == BRW_MESSAGE_REGISTER_FILE); |
||
2307 | brw_inst_set_base_mrf(devinfo, insn, payload.nr); |
||
2308 | src0 = implied_header; |
||
2309 | |||
2310 | msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
||
2311 | } |
||
2312 | |||
2313 | brw_set_dest(p, insn, dest); |
||
2314 | brw_set_src0(p, insn, src0); |
||
2315 | brw_set_dp_write_message(p, |
||
2316 | insn, |
||
2317 | binding_table_index, |
||
2318 | msg_control, |
||
2319 | msg_type, |
||
2320 | msg_length, |
||
2321 | header_present, |
||
2322 | last_render_target, |
||
2323 | response_length, |
||
2324 | eot, |
||
2325 | |||
2326 | } |
||
2327 | |||
2328 | |||
2329 | /** |
||
2330 | * Texture sample instruction. |
||
2331 | * Note: the msg_type plus msg_length values determine exactly what kind |
||
2332 | * of sampling operation is performed. See volume 4, page 161 of docs. |
||
2333 | */ |
||
2334 | void brw_SAMPLE(struct brw_codegen *p, |
||
2335 | struct brw_reg dest, |
||
2336 | unsigned msg_reg_nr, |
||
2337 | struct brw_reg src0, |
||
2338 | unsigned binding_table_index, |
||
2339 | unsigned sampler, |
||
2340 | unsigned msg_type, |
||
2341 | unsigned response_length, |
||
2342 | unsigned msg_length, |
||
2343 | unsigned header_present, |
||
2344 | unsigned simd_mode, |
||
2345 | unsigned return_format) |
||
2346 | { |
||
2347 | const struct brw_device_info *devinfo = p->devinfo; |
||
2348 | brw_inst *insn; |
||
2349 | |||
2350 | if (msg_reg_nr != -1) |
||
2351 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
2352 | |||
2353 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
2354 | brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */ |
||
2355 | |||
2356 | /* From the 965 PRM (volume 4, part 1, section 14.2.41): |
||
2357 | * |
||
2358 | * "Instruction compression is not allowed for this instruction (that |
||
2359 | * is, send). The hardware behavior is undefined if this instruction is |
||
2360 | * set as compressed. However, compress control can be set to "SecHalf" |
||
2361 | * to affect the EMask generation." |
||
2362 | * |
||
2363 | * No similar wording is found in later PRMs, but there are examples |
||
2364 | * utilizing send with SecHalf. More importantly, SIMD8 sampler messages |
||
2365 | * are allowed in SIMD16 mode and they could not work without SecHalf. For |
||
2366 | * these reasons, we allow BRW_COMPRESSION_2NDHALF here. |
||
2367 | */ |
||
2368 | if (brw_inst_qtr_control(devinfo, insn) != BRW_COMPRESSION_2NDHALF) |
||
2369 | brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE); |
||
2370 | |||
2371 | if (devinfo->gen < 6) |
||
2372 | brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); |
||
2373 | |||
2374 | brw_set_dest(p, insn, dest); |
||
2375 | brw_set_src0(p, insn, src0); |
||
2376 | brw_set_sampler_message(p, insn, |
||
2377 | binding_table_index, |
||
2378 | sampler, |
||
2379 | msg_type, |
||
2380 | response_length, |
||
2381 | msg_length, |
||
2382 | header_present, |
||
2383 | simd_mode, |
||
2384 | return_format); |
||
2385 | } |
||
2386 | |||
2387 | /* Adjust the message header's sampler state pointer to |
||
2388 | * select the correct group of 16 samplers. |
||
2389 | */ |
||
2390 | void brw_adjust_sampler_state_pointer(struct brw_codegen *p, |
||
2391 | struct brw_reg header, |
||
2392 | struct brw_reg sampler_index) |
||
2393 | { |
||
2394 | /* The "Sampler Index" field can only store values between 0 and 15. |
||
2395 | * However, we can add an offset to the "Sampler State Pointer" |
||
2396 | * field, effectively selecting a different set of 16 samplers. |
||
2397 | * |
||
2398 | * The "Sampler State Pointer" needs to be aligned to a 32-byte |
||
2399 | * offset, and each sampler state is only 16-bytes, so we can't |
||
2400 | * exclusively use the offset - we have to use both. |
||
2401 | */ |
||
2402 | |||
2403 | const struct brw_device_info *devinfo = p->devinfo; |
||
2404 | |||
2405 | if (sampler_index.file == BRW_IMMEDIATE_VALUE) { |
||
2406 | const int sampler_state_size = 16; /* 16 bytes */ |
||
2407 | uint32_t sampler = sampler_index.dw1.ud; |
||
2408 | |||
2409 | if (sampler >= 16) { |
||
2410 | assert(devinfo->is_haswell || devinfo->gen >= 8); |
||
2411 | brw_ADD(p, |
||
2412 | get_element_ud(header, 3), |
||
2413 | get_element_ud(brw_vec8_grf(0, 0), 3), |
||
2414 | brw_imm_ud(16 * (sampler / 16) * sampler_state_size)); |
||
2415 | } |
||
2416 | } else { |
||
2417 | /* Non-const sampler array indexing case */ |
||
2418 | if (devinfo->gen < 8 && !devinfo->is_haswell) { |
||
2419 | return; |
||
2420 | } |
||
2421 | |||
2422 | struct brw_reg temp = get_element_ud(header, 3); |
||
2423 | |||
2424 | brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0)); |
||
2425 | brw_SHL(p, temp, temp, brw_imm_ud(4)); |
||
2426 | brw_ADD(p, |
||
2427 | get_element_ud(header, 3), |
||
2428 | get_element_ud(brw_vec8_grf(0, 0), 3), |
||
2429 | temp); |
||
2430 | } |
||
2431 | } |
||
2432 | |||
2433 | /* All these variables are pretty confusing - we might be better off |
||
2434 | * using bitmasks and macros for this, in the old style. Or perhaps |
||
2435 | * just having the caller instantiate the fields in dword3 itself. |
||
2436 | */ |
||
2437 | void brw_urb_WRITE(struct brw_codegen *p, |
||
2438 | struct brw_reg dest, |
||
2439 | unsigned msg_reg_nr, |
||
2440 | struct brw_reg src0, |
||
2441 | enum brw_urb_write_flags flags, |
||
2442 | unsigned msg_length, |
||
2443 | unsigned response_length, |
||
2444 | unsigned offset, |
||
2445 | unsigned swizzle) |
||
2446 | { |
||
2447 | const struct brw_device_info *devinfo = p->devinfo; |
||
2448 | brw_inst *insn; |
||
2449 | |||
2450 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
2451 | |||
2452 | if (devinfo->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) { |
||
2453 | /* Enable Channel Masks in the URB_WRITE_HWORD message header */ |
||
2454 | brw_push_insn_state(p); |
||
2455 | brw_set_default_access_mode(p, BRW_ALIGN_1); |
||
2456 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2457 | brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), |
||
2458 | BRW_REGISTER_TYPE_UD), |
||
2459 | retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), |
||
2460 | brw_imm_ud(0xff00)); |
||
2461 | brw_pop_insn_state(p); |
||
2462 | } |
||
2463 | |||
2464 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
2465 | |||
2466 | assert(msg_length < BRW_MAX_MRF); |
||
2467 | |||
2468 | brw_set_dest(p, insn, dest); |
||
2469 | brw_set_src0(p, insn, src0); |
||
2470 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
2471 | |||
2472 | if (devinfo->gen < 6) |
||
2473 | brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); |
||
2474 | |||
2475 | brw_set_urb_message(p, |
||
2476 | insn, |
||
2477 | flags, |
||
2478 | msg_length, |
||
2479 | response_length, |
||
2480 | offset, |
||
2481 | swizzle); |
||
2482 | } |
||
2483 | |||
2484 | struct brw_inst * |
||
2485 | brw_send_indirect_message(struct brw_codegen *p, |
||
2486 | unsigned sfid, |
||
2487 | struct brw_reg dst, |
||
2488 | struct brw_reg payload, |
||
2489 | struct brw_reg desc) |
||
2490 | { |
||
2491 | const struct brw_device_info *devinfo = p->devinfo; |
||
2492 | struct brw_inst *send, *setup; |
||
2493 | |||
2494 | assert(desc.type == BRW_REGISTER_TYPE_UD); |
||
2495 | |||
2496 | if (desc.file == BRW_IMMEDIATE_VALUE) { |
||
2497 | setup = send = next_insn(p, BRW_OPCODE_SEND); |
||
2498 | brw_set_src1(p, send, desc); |
||
2499 | |||
2500 | } else { |
||
2501 | struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); |
||
2502 | |||
2503 | brw_push_insn_state(p); |
||
2504 | brw_set_default_access_mode(p, BRW_ALIGN_1); |
||
2505 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2506 | brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); |
||
2507 | |||
2508 | /* Load the indirect descriptor to an address register using OR so the |
||
2509 | * caller can specify additional descriptor bits with the usual |
||
2510 | * brw_set_*_message() helper functions. |
||
2511 | */ |
||
2512 | setup = brw_OR(p, addr, desc, brw_imm_ud(0)); |
||
2513 | |||
2514 | brw_pop_insn_state(p); |
||
2515 | |||
2516 | send = next_insn(p, BRW_OPCODE_SEND); |
||
2517 | brw_set_src1(p, send, addr); |
||
2518 | } |
||
2519 | |||
2520 | brw_set_dest(p, send, dst); |
||
2521 | brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD)); |
||
2522 | brw_inst_set_sfid(devinfo, send, sfid); |
||
2523 | |||
2524 | return setup; |
||
2525 | } |
||
2526 | |||
2527 | static struct brw_inst * |
||
2528 | brw_send_indirect_surface_message(struct brw_codegen *p, |
||
2529 | unsigned sfid, |
||
2530 | struct brw_reg dst, |
||
2531 | struct brw_reg payload, |
||
2532 | struct brw_reg surface, |
||
2533 | unsigned message_len, |
||
2534 | unsigned response_len, |
||
2535 | bool header_present) |
||
2536 | { |
||
2537 | const struct brw_device_info *devinfo = p->devinfo; |
||
2538 | struct brw_inst *insn; |
||
2539 | |||
2540 | if (surface.file != BRW_IMMEDIATE_VALUE) { |
||
2541 | struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); |
||
2542 | |||
2543 | brw_push_insn_state(p); |
||
2544 | brw_set_default_access_mode(p, BRW_ALIGN_1); |
||
2545 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
2546 | brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); |
||
2547 | |||
2548 | /* Mask out invalid bits from the surface index to avoid hangs e.g. when |
||
2549 | * some surface array is accessed out of bounds. |
||
2550 | */ |
||
2551 | insn = brw_AND(p, addr, |
||
2552 | suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), |
||
2553 | BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)), |
||
2554 | brw_imm_ud(0xff)); |
||
2555 | |||
2556 | brw_pop_insn_state(p); |
||
2557 | |||
2558 | surface = addr; |
||
2559 | } |
||
2560 | |||
2561 | insn = brw_send_indirect_message(p, sfid, dst, payload, surface); |
||
2562 | brw_inst_set_mlen(devinfo, insn, message_len); |
||
2563 | brw_inst_set_rlen(devinfo, insn, response_len); |
||
2564 | brw_inst_set_header_present(devinfo, insn, header_present); |
||
2565 | |||
2566 | return insn; |
||
2567 | } |
||
2568 | |||
2569 | static int |
||
2570 | brw_find_next_block_end(struct brw_codegen *p, int start_offset) |
||
2571 | { |
||
2572 | int offset; |
||
2573 | void *store = p->store; |
||
2574 | const struct brw_device_info *devinfo = p->devinfo; |
||
2575 | |||
2576 | for (offset = next_offset(devinfo, store, start_offset); |
||
2577 | offset < p->next_insn_offset; |
||
2578 | offset = next_offset(devinfo, store, offset)) { |
||
2579 | brw_inst *insn = store + offset; |
||
2580 | |||
2581 | switch (brw_inst_opcode(devinfo, insn)) { |
||
2582 | case BRW_OPCODE_ENDIF: |
||
2583 | case BRW_OPCODE_ELSE: |
||
2584 | case BRW_OPCODE_WHILE: |
||
2585 | case BRW_OPCODE_HALT: |
||
2586 | return offset; |
||
2587 | } |
||
2588 | } |
||
2589 | |||
2590 | return 0; |
||
2591 | } |
||
2592 | |||
2593 | /* There is no DO instruction on gen6, so to find the end of the loop |
||
2594 | * we have to see if the loop is jumping back before our start |
||
2595 | * instruction. |
||
2596 | */ |
||
2597 | static int |
||
2598 | brw_find_loop_end(struct brw_codegen *p, int start_offset) |
||
2599 | { |
||
2600 | const struct brw_device_info *devinfo = p->devinfo; |
||
2601 | int offset; |
||
2602 | int scale = 16 / brw_jump_scale(devinfo); |
||
2603 | void *store = p->store; |
||
2604 | |||
2605 | assert(devinfo->gen >= 6); |
||
2606 | |||
2607 | /* Always start after the instruction (such as a WHILE) we're trying to fix |
||
2608 | * up. |
||
2609 | */ |
||
2610 | for (offset = next_offset(devinfo, store, start_offset); |
||
2611 | offset < p->next_insn_offset; |
||
2612 | offset = next_offset(devinfo, store, offset)) { |
||
2613 | brw_inst *insn = store + offset; |
||
2614 | |||
2615 | if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE) { |
||
2616 | int jip = devinfo->gen == 6 ? brw_inst_gen6_jump_count(devinfo, insn) |
||
2617 | : brw_inst_jip(devinfo, insn); |
||
2618 | if (offset + jip * scale <= start_offset) |
||
2619 | return offset; |
||
2620 | } |
||
2621 | } |
||
2622 | assert(!"not reached"); |
||
2623 | return start_offset; |
||
2624 | } |
||
2625 | |||
2626 | /* After program generation, go back and update the UIP and JIP of |
||
2627 | * BREAK, CONT, and HALT instructions to their correct locations. |
||
2628 | */ |
||
2629 | void |
||
2630 | brw_set_uip_jip(struct brw_codegen *p) |
||
2631 | { |
||
2632 | const struct brw_device_info *devinfo = p->devinfo; |
||
2633 | int offset; |
||
2634 | int br = brw_jump_scale(devinfo); |
||
2635 | int scale = 16 / br; |
||
2636 | void *store = p->store; |
||
2637 | |||
2638 | if (devinfo->gen < 6) |
||
2639 | return; |
||
2640 | |||
2641 | for (offset = 0; offset < p->next_insn_offset; |
||
2642 | offset = next_offset(devinfo, store, offset)) { |
||
2643 | brw_inst *insn = store + offset; |
||
2644 | |||
2645 | if (brw_inst_cmpt_control(devinfo, insn)) { |
||
2646 | /* Fixups for compacted BREAK/CONTINUE not supported yet. */ |
||
2647 | assert(brw_inst_opcode(devinfo, insn) != BRW_OPCODE_BREAK && |
||
2648 | brw_inst_opcode(devinfo, insn) != BRW_OPCODE_CONTINUE && |
||
2649 | brw_inst_opcode(devinfo, insn) != BRW_OPCODE_HALT); |
||
2650 | continue; |
||
2651 | } |
||
2652 | |||
2653 | int block_end_offset = brw_find_next_block_end(p, offset); |
||
2654 | switch (brw_inst_opcode(devinfo, insn)) { |
||
2655 | case BRW_OPCODE_BREAK: |
||
2656 | assert(block_end_offset != 0); |
||
2657 | brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); |
||
2658 | /* Gen7 UIP points to WHILE; Gen6 points just after it */ |
||
2659 | brw_inst_set_uip(devinfo, insn, |
||
2660 | (brw_find_loop_end(p, offset) - offset + |
||
2661 | (devinfo->gen == 6 ? 16 : 0)) / scale); |
||
2662 | break; |
||
2663 | case BRW_OPCODE_CONTINUE: |
||
2664 | assert(block_end_offset != 0); |
||
2665 | brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); |
||
2666 | brw_inst_set_uip(devinfo, insn, |
||
2667 | (brw_find_loop_end(p, offset) - offset) / scale); |
||
2668 | |||
2669 | assert(brw_inst_uip(devinfo, insn) != 0); |
||
2670 | assert(brw_inst_jip(devinfo, insn) != 0); |
||
2671 | break; |
||
2672 | |||
2673 | case BRW_OPCODE_ENDIF: { |
||
2674 | int32_t jump = (block_end_offset == 0) ? |
||
2675 | 1 * br : (block_end_offset - offset) / scale; |
||
2676 | if (devinfo->gen >= 7) |
||
2677 | brw_inst_set_jip(devinfo, insn, jump); |
||
2678 | else |
||
2679 | brw_inst_set_gen6_jump_count(devinfo, insn, jump); |
||
2680 | break; |
||
2681 | } |
||
2682 | |||
2683 | case BRW_OPCODE_HALT: |
||
2684 | /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19): |
||
2685 | * |
||
2686 | * "In case of the halt instruction not inside any conditional |
||
2687 | * code block, the value of |
||
2688 | * same. In case of the halt instruction inside conditional code |
||
2689 | * block, the |
||
2690 | * |
||
2691 | * |
||
2692 | * The uip will have already been set by whoever set up the |
||
2693 | * instruction. |
||
2694 | */ |
||
2695 | if (block_end_offset == 0) { |
||
2696 | brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn)); |
||
2697 | } else { |
||
2698 | brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale); |
||
2699 | } |
||
2700 | assert(brw_inst_uip(devinfo, insn) != 0); |
||
2701 | assert(brw_inst_jip(devinfo, insn) != 0); |
||
2702 | break; |
||
2703 | } |
||
2704 | } |
||
2705 | } |
||
2706 | |||
2707 | void brw_ff_sync(struct brw_codegen *p, |
||
2708 | struct brw_reg dest, |
||
2709 | unsigned msg_reg_nr, |
||
2710 | struct brw_reg src0, |
||
2711 | bool allocate, |
||
2712 | unsigned response_length, |
||
2713 | bool eot) |
||
2714 | { |
||
2715 | const struct brw_device_info *devinfo = p->devinfo; |
||
2716 | brw_inst *insn; |
||
2717 | |||
2718 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
2719 | |||
2720 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
2721 | brw_set_dest(p, insn, dest); |
||
2722 | brw_set_src0(p, insn, src0); |
||
2723 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
2724 | |||
2725 | if (devinfo->gen < 6) |
||
2726 | brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr); |
||
2727 | |||
2728 | brw_set_ff_sync_message(p, |
||
2729 | insn, |
||
2730 | allocate, |
||
2731 | response_length, |
||
2732 | eot); |
||
2733 | } |
||
2734 | |||
2735 | /** |
||
2736 | * Emit the SEND instruction necessary to generate stream output data on Gen6 |
||
2737 | * (for transform feedback). |
||
2738 | * |
||
2739 | * If send_commit_msg is true, this is the last piece of stream output data |
||
2740 | * from this thread, so send the data as a committed write. According to the |
||
2741 | * Sandy Bridge PRM (volume 2 part 1, section 4.5.1): |
||
2742 | * |
||
2743 | * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all |
||
2744 | * writes are complete by sending the final write as a committed write." |
||
2745 | */ |
||
2746 | void |
||
2747 | brw_svb_write(struct brw_codegen *p, |
||
2748 | struct brw_reg dest, |
||
2749 | unsigned msg_reg_nr, |
||
2750 | struct brw_reg src0, |
||
2751 | unsigned binding_table_index, |
||
2752 | bool send_commit_msg) |
||
2753 | { |
||
2754 | brw_inst *insn; |
||
2755 | |||
2756 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
2757 | |||
2758 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
2759 | brw_set_dest(p, insn, dest); |
||
2760 | brw_set_src0(p, insn, src0); |
||
2761 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
2762 | brw_set_dp_write_message(p, insn, |
||
2763 | binding_table_index, |
||
2764 | 0, /* msg_control: ignored */ |
||
2765 | GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, |
||
2766 | 1, /* msg_length */ |
||
2767 | true, /* header_present */ |
||
2768 | 0, /* last_render_target: ignored */ |
||
2769 | send_commit_msg, /* response_length */ |
||
2770 | 0, /* end_of_thread */ |
||
2771 | send_commit_msg); /* send_commit_msg */ |
||
2772 | } |
||
2773 | |||
2774 | static unsigned |
||
2775 | brw_surface_payload_size(struct brw_codegen *p, |
||
2776 | unsigned num_channels, |
||
2777 | bool has_simd4x2, |
||
2778 | bool has_simd16) |
||
2779 | { |
||
2780 | if (has_simd4x2 && brw_inst_access_mode(p->devinfo, p->current) == BRW_ALIGN_16) |
||
2781 | return 1; |
||
2782 | else if (has_simd16 && p->compressed) |
||
2783 | return 2 * num_channels; |
||
2784 | else |
||
2785 | return num_channels; |
||
2786 | } |
||
2787 | |||
2788 | static void |
||
2789 | brw_set_dp_untyped_atomic_message(struct brw_codegen *p, |
||
2790 | brw_inst *insn, |
||
2791 | unsigned atomic_op, |
||
2792 | bool response_expected) |
||
2793 | { |
||
2794 | const struct brw_device_info *devinfo = p->devinfo; |
||
2795 | unsigned msg_control = |
||
2796 | atomic_op | /* Atomic Operation Type: BRW_AOP_* */ |
||
2797 | (response_expected ? 1 << 5 : 0); /* Return data expected */ |
||
2798 | |||
2799 | if (devinfo->gen >= 8 || devinfo->is_haswell) { |
||
2800 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
2801 | if (!p->compressed) |
||
2802 | msg_control |= 1 << 4; /* SIMD8 mode */ |
||
2803 | |||
2804 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2805 | HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP); |
||
2806 | } else { |
||
2807 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2808 | HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); |
||
2809 | } |
||
2810 | } else { |
||
2811 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2812 | GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); |
||
2813 | |||
2814 | if (!p->compressed) |
||
2815 | msg_control |= 1 << 4; /* SIMD8 mode */ |
||
2816 | } |
||
2817 | |||
2818 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
2819 | } |
||
2820 | |||
2821 | void |
||
2822 | brw_untyped_atomic(struct brw_codegen *p, |
||
2823 | struct brw_reg dst, |
||
2824 | struct brw_reg payload, |
||
2825 | struct brw_reg surface, |
||
2826 | unsigned atomic_op, |
||
2827 | unsigned msg_length, |
||
2828 | bool response_expected) |
||
2829 | { |
||
2830 | const struct brw_device_info *devinfo = p->devinfo; |
||
2831 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2832 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
2833 | GEN7_SFID_DATAPORT_DATA_CACHE); |
||
2834 | const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; |
||
2835 | /* Mask out unused components -- This is especially important in Align16 |
||
2836 | * mode on generations that don't have native support for SIMD4x2 atomics, |
||
2837 | * because unused but enabled components will cause the dataport to perform |
||
2838 | * additional atomic operations on the addresses that happen to be in the |
||
2839 | * uninitialized Y, Z and W coordinates of the payload. |
||
2840 | */ |
||
2841 | const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; |
||
2842 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
2843 | p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, |
||
2844 | brw_surface_payload_size(p, response_expected, |
||
2845 | devinfo->gen >= 8 || devinfo->is_haswell, true), |
||
2846 | align1); |
||
2847 | |||
2848 | brw_set_dp_untyped_atomic_message( |
||
2849 | p, insn, atomic_op, response_expected); |
||
2850 | } |
||
2851 | |||
2852 | static void |
||
2853 | brw_set_dp_untyped_surface_read_message(struct brw_codegen *p, |
||
2854 | struct brw_inst *insn, |
||
2855 | unsigned num_channels) |
||
2856 | { |
||
2857 | const struct brw_device_info *devinfo = p->devinfo; |
||
2858 | /* Set mask of 32-bit channels to drop. */ |
||
2859 | unsigned msg_control = 0xf & (0xf << num_channels); |
||
2860 | |||
2861 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
2862 | if (p->compressed) |
||
2863 | msg_control |= 1 << 4; /* SIMD16 mode */ |
||
2864 | else |
||
2865 | msg_control |= 2 << 4; /* SIMD8 mode */ |
||
2866 | } |
||
2867 | |||
2868 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2869 | (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2870 | HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : |
||
2871 | GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); |
||
2872 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
2873 | } |
||
2874 | |||
2875 | void |
||
2876 | brw_untyped_surface_read(struct brw_codegen *p, |
||
2877 | struct brw_reg dst, |
||
2878 | struct brw_reg payload, |
||
2879 | struct brw_reg surface, |
||
2880 | unsigned msg_length, |
||
2881 | unsigned num_channels) |
||
2882 | { |
||
2883 | const struct brw_device_info *devinfo = p->devinfo; |
||
2884 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2885 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
2886 | GEN7_SFID_DATAPORT_DATA_CACHE); |
||
2887 | const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); |
||
2888 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
2889 | p, sfid, dst, payload, surface, msg_length, |
||
2890 | brw_surface_payload_size(p, num_channels, true, true), |
||
2891 | align1); |
||
2892 | |||
2893 | brw_set_dp_untyped_surface_read_message( |
||
2894 | p, insn, num_channels); |
||
2895 | } |
||
2896 | |||
2897 | static void |
||
2898 | brw_set_dp_untyped_surface_write_message(struct brw_codegen *p, |
||
2899 | struct brw_inst *insn, |
||
2900 | unsigned num_channels) |
||
2901 | { |
||
2902 | const struct brw_device_info *devinfo = p->devinfo; |
||
2903 | /* Set mask of 32-bit channels to drop. */ |
||
2904 | unsigned msg_control = 0xf & (0xf << num_channels); |
||
2905 | |||
2906 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
2907 | if (p->compressed) |
||
2908 | msg_control |= 1 << 4; /* SIMD16 mode */ |
||
2909 | else |
||
2910 | msg_control |= 2 << 4; /* SIMD8 mode */ |
||
2911 | } else { |
||
2912 | if (devinfo->gen >= 8 || devinfo->is_haswell) |
||
2913 | msg_control |= 0 << 4; /* SIMD4x2 mode */ |
||
2914 | else |
||
2915 | msg_control |= 2 << 4; /* SIMD8 mode */ |
||
2916 | } |
||
2917 | |||
2918 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2919 | devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2920 | HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE : |
||
2921 | GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE); |
||
2922 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
2923 | } |
||
2924 | |||
2925 | void |
||
2926 | brw_untyped_surface_write(struct brw_codegen *p, |
||
2927 | struct brw_reg payload, |
||
2928 | struct brw_reg surface, |
||
2929 | unsigned msg_length, |
||
2930 | unsigned num_channels) |
||
2931 | { |
||
2932 | const struct brw_device_info *devinfo = p->devinfo; |
||
2933 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2934 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
2935 | GEN7_SFID_DATAPORT_DATA_CACHE); |
||
2936 | const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; |
||
2937 | /* Mask out unused components -- See comment in brw_untyped_atomic(). */ |
||
2938 | const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? |
||
2939 | WRITEMASK_X : WRITEMASK_XYZW; |
||
2940 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
2941 | p, sfid, brw_writemask(brw_null_reg(), mask), |
||
2942 | payload, surface, msg_length, 0, align1); |
||
2943 | |||
2944 | brw_set_dp_untyped_surface_write_message( |
||
2945 | p, insn, num_channels); |
||
2946 | } |
||
2947 | |||
2948 | static void |
||
2949 | brw_set_dp_typed_atomic_message(struct brw_codegen *p, |
||
2950 | struct brw_inst *insn, |
||
2951 | unsigned atomic_op, |
||
2952 | bool response_expected) |
||
2953 | { |
||
2954 | const struct brw_device_info *devinfo = p->devinfo; |
||
2955 | unsigned msg_control = |
||
2956 | atomic_op | /* Atomic Operation Type: BRW_AOP_* */ |
||
2957 | (response_expected ? 1 << 5 : 0); /* Return data expected */ |
||
2958 | |||
2959 | if (devinfo->gen >= 8 || devinfo->is_haswell) { |
||
2960 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
2961 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
2962 | msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ |
||
2963 | |||
2964 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2965 | HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP); |
||
2966 | } else { |
||
2967 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2968 | HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2); |
||
2969 | } |
||
2970 | |||
2971 | } else { |
||
2972 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
2973 | GEN7_DATAPORT_RC_TYPED_ATOMIC_OP); |
||
2974 | |||
2975 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
2976 | msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */ |
||
2977 | } |
||
2978 | |||
2979 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
2980 | } |
||
2981 | |||
2982 | void |
||
2983 | brw_typed_atomic(struct brw_codegen *p, |
||
2984 | struct brw_reg dst, |
||
2985 | struct brw_reg payload, |
||
2986 | struct brw_reg surface, |
||
2987 | unsigned atomic_op, |
||
2988 | unsigned msg_length, |
||
2989 | bool response_expected) { |
||
2990 | const struct brw_device_info *devinfo = p->devinfo; |
||
2991 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
2992 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
2993 | GEN6_SFID_DATAPORT_RENDER_CACHE); |
||
2994 | const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); |
||
2995 | /* Mask out unused components -- See comment in brw_untyped_atomic(). */ |
||
2996 | const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X; |
||
2997 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
2998 | p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, |
||
2999 | brw_surface_payload_size(p, response_expected, |
||
3000 | devinfo->gen >= 8 || devinfo->is_haswell, false), |
||
3001 | true); |
||
3002 | |||
3003 | brw_set_dp_typed_atomic_message( |
||
3004 | p, insn, atomic_op, response_expected); |
||
3005 | } |
||
3006 | |||
3007 | static void |
||
3008 | brw_set_dp_typed_surface_read_message(struct brw_codegen *p, |
||
3009 | struct brw_inst *insn, |
||
3010 | unsigned num_channels) |
||
3011 | { |
||
3012 | const struct brw_device_info *devinfo = p->devinfo; |
||
3013 | /* Set mask of unused channels. */ |
||
3014 | unsigned msg_control = 0xf & (0xf << num_channels); |
||
3015 | |||
3016 | if (devinfo->gen >= 8 || devinfo->is_haswell) { |
||
3017 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
3018 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
3019 | msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ |
||
3020 | else |
||
3021 | msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ |
||
3022 | } |
||
3023 | |||
3024 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
3025 | HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ); |
||
3026 | } else { |
||
3027 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
3028 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
3029 | msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ |
||
3030 | } |
||
3031 | |||
3032 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
3033 | GEN7_DATAPORT_RC_TYPED_SURFACE_READ); |
||
3034 | } |
||
3035 | |||
3036 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
3037 | } |
||
3038 | |||
3039 | void |
||
3040 | brw_typed_surface_read(struct brw_codegen *p, |
||
3041 | struct brw_reg dst, |
||
3042 | struct brw_reg payload, |
||
3043 | struct brw_reg surface, |
||
3044 | unsigned msg_length, |
||
3045 | unsigned num_channels) |
||
3046 | { |
||
3047 | const struct brw_device_info *devinfo = p->devinfo; |
||
3048 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
3049 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
3050 | GEN6_SFID_DATAPORT_RENDER_CACHE); |
||
3051 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
3052 | p, sfid, dst, payload, surface, msg_length, |
||
3053 | brw_surface_payload_size(p, num_channels, |
||
3054 | devinfo->gen >= 8 || devinfo->is_haswell, false), |
||
3055 | true); |
||
3056 | |||
3057 | brw_set_dp_typed_surface_read_message( |
||
3058 | p, insn, num_channels); |
||
3059 | } |
||
3060 | |||
3061 | static void |
||
3062 | brw_set_dp_typed_surface_write_message(struct brw_codegen *p, |
||
3063 | struct brw_inst *insn, |
||
3064 | unsigned num_channels) |
||
3065 | { |
||
3066 | const struct brw_device_info *devinfo = p->devinfo; |
||
3067 | /* Set mask of unused channels. */ |
||
3068 | unsigned msg_control = 0xf & (0xf << num_channels); |
||
3069 | |||
3070 | if (devinfo->gen >= 8 || devinfo->is_haswell) { |
||
3071 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
3072 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
3073 | msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */ |
||
3074 | else |
||
3075 | msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */ |
||
3076 | } |
||
3077 | |||
3078 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
3079 | HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE); |
||
3080 | |||
3081 | } else { |
||
3082 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
3083 | if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q) |
||
3084 | msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */ |
||
3085 | } |
||
3086 | |||
3087 | brw_inst_set_dp_msg_type(devinfo, insn, |
||
3088 | GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE); |
||
3089 | } |
||
3090 | |||
3091 | brw_inst_set_dp_msg_control(devinfo, insn, msg_control); |
||
3092 | } |
||
3093 | |||
3094 | void |
||
3095 | brw_typed_surface_write(struct brw_codegen *p, |
||
3096 | struct brw_reg payload, |
||
3097 | struct brw_reg surface, |
||
3098 | unsigned msg_length, |
||
3099 | unsigned num_channels) |
||
3100 | { |
||
3101 | const struct brw_device_info *devinfo = p->devinfo; |
||
3102 | const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? |
||
3103 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
3104 | GEN6_SFID_DATAPORT_RENDER_CACHE); |
||
3105 | const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); |
||
3106 | /* Mask out unused components -- See comment in brw_untyped_atomic(). */ |
||
3107 | const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ? |
||
3108 | WRITEMASK_X : WRITEMASK_XYZW); |
||
3109 | struct brw_inst *insn = brw_send_indirect_surface_message( |
||
3110 | p, sfid, brw_writemask(brw_null_reg(), mask), |
||
3111 | payload, surface, msg_length, 0, true); |
||
3112 | |||
3113 | brw_set_dp_typed_surface_write_message( |
||
3114 | p, insn, num_channels); |
||
3115 | } |
||
3116 | |||
3117 | static void |
||
3118 | brw_set_memory_fence_message(struct brw_codegen *p, |
||
3119 | struct brw_inst *insn, |
||
3120 | enum brw_message_target sfid, |
||
3121 | bool commit_enable) |
||
3122 | { |
||
3123 | const struct brw_device_info *devinfo = p->devinfo; |
||
3124 | |||
3125 | brw_set_message_descriptor(p, insn, sfid, |
||
3126 | 1 /* message length */, |
||
3127 | (commit_enable ? 1 : 0) /* response length */, |
||
3128 | true /* header present */, |
||
3129 | false); |
||
3130 | |||
3131 | switch (sfid) { |
||
3132 | case GEN6_SFID_DATAPORT_RENDER_CACHE: |
||
3133 | brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE); |
||
3134 | break; |
||
3135 | case GEN7_SFID_DATAPORT_DATA_CACHE: |
||
3136 | brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE); |
||
3137 | break; |
||
3138 | default: |
||
3139 | unreachable("Not reached"); |
||
3140 | } |
||
3141 | |||
3142 | if (commit_enable) |
||
3143 | brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5); |
||
3144 | } |
||
3145 | |||
3146 | void |
||
3147 | brw_memory_fence(struct brw_codegen *p, |
||
3148 | struct brw_reg dst) |
||
3149 | { |
||
3150 | const struct brw_device_info *devinfo = p->devinfo; |
||
3151 | const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; |
||
3152 | struct brw_inst *insn; |
||
3153 | |||
3154 | /* Set dst as destination for dependency tracking, the MEMORY_FENCE |
||
3155 | * message doesn't write anything back. |
||
3156 | */ |
||
3157 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
3158 | brw_set_dest(p, insn, dst); |
||
3159 | brw_set_src0(p, insn, dst); |
||
3160 | brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, |
||
3161 | commit_enable); |
||
3162 | |||
3163 | if (devinfo->gen == 7 && !devinfo->is_haswell) { |
||
3164 | /* IVB does typed surface access through the render cache, so we need to |
||
3165 | * flush it too. Use a different register so both flushes can be |
||
3166 | * pipelined by the hardware. |
||
3167 | */ |
||
3168 | insn = next_insn(p, BRW_OPCODE_SEND); |
||
3169 | brw_set_dest(p, insn, offset(dst, 1)); |
||
3170 | brw_set_src0(p, insn, offset(dst, 1)); |
||
3171 | brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE, |
||
3172 | commit_enable); |
||
3173 | |||
3174 | /* Now write the response of the second message into the response of the |
||
3175 | * first to trigger a pipeline stall -- This way future render and data |
||
3176 | * cache messages will be properly ordered with respect to past data and |
||
3177 | * render cache messages. |
||
3178 | */ |
||
3179 | brw_push_insn_state(p); |
||
3180 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
3181 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
3182 | brw_MOV(p, dst, offset(dst, 1)); |
||
3183 | brw_pop_insn_state(p); |
||
3184 | } |
||
3185 | } |
||
3186 | |||
3187 | void |
||
3188 | brw_pixel_interpolator_query(struct brw_codegen *p, |
||
3189 | struct brw_reg dest, |
||
3190 | struct brw_reg mrf, |
||
3191 | bool noperspective, |
||
3192 | unsigned mode, |
||
3193 | unsigned data, |
||
3194 | unsigned msg_length, |
||
3195 | unsigned response_length) |
||
3196 | { |
||
3197 | const struct brw_device_info *devinfo = p->devinfo; |
||
3198 | struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); |
||
3199 | |||
3200 | brw_set_dest(p, insn, dest); |
||
3201 | brw_set_src0(p, insn, mrf); |
||
3202 | brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR, |
||
3203 | msg_length, response_length, |
||
3204 | false /* header is never present for PI */, |
||
3205 | false); |
||
3206 | |||
3207 | brw_inst_set_pi_simd_mode( |
||
3208 | devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16); |
||
3209 | brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */ |
||
3210 | brw_inst_set_pi_nopersp(devinfo, insn, noperspective); |
||
3211 | brw_inst_set_pi_message_type(devinfo, insn, mode); |
||
3212 | brw_inst_set_pi_message_data(devinfo, insn, data); |
||
3213 | } |
||
3214 | |||
3215 | void |
||
3216 | brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) |
||
3217 | { |
||
3218 | const struct brw_device_info *devinfo = p->devinfo; |
||
3219 | brw_inst *inst; |
||
3220 | |||
3221 | assert(devinfo->gen >= 7); |
||
3222 | |||
3223 | brw_push_insn_state(p); |
||
3224 | |||
3225 | if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) { |
||
3226 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
3227 | |||
3228 | if (devinfo->gen >= 8) { |
||
3229 | /* Getting the first active channel index is easy on Gen8: Just find |
||
3230 | * the first bit set in the mask register. The same register exists |
||
3231 | * on HSW already but it reads back as all ones when the current |
||
3232 | * instruction has execution masking disabled, so it's kind of |
||
3233 | * useless. |
||
3234 | */ |
||
3235 | inst = brw_FBL(p, vec1(dst), |
||
3236 | retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); |
||
3237 | |||
3238 | /* Quarter control has the effect of magically shifting the value of |
||
3239 | * this register. Make sure it's set to zero. |
||
3240 | */ |
||
3241 | brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q); |
||
3242 | } else { |
||
3243 | const struct brw_reg flag = retype(brw_flag_reg(1, 0), |
||
3244 | BRW_REGISTER_TYPE_UD); |
||
3245 | |||
3246 | brw_MOV(p, flag, brw_imm_ud(0)); |
||
3247 | |||
3248 | /* Run a 16-wide instruction returning zero with execution masking |
||
3249 | * and a conditional modifier enabled in order to get the current |
||
3250 | * execution mask in f1.0. |
||
3251 | */ |
||
3252 | inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0)); |
||
3253 | brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16); |
||
3254 | brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); |
||
3255 | brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); |
||
3256 | brw_inst_set_flag_reg_nr(devinfo, inst, 1); |
||
3257 | |||
3258 | brw_FBL(p, vec1(dst), flag); |
||
3259 | } |
||
3260 | } else { |
||
3261 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
3262 | |||
3263 | if (devinfo->gen >= 8) { |
||
3264 | /* In SIMD4x2 mode the first active channel index is just the |
||
3265 | * negation of the first bit of the mask register. |
||
3266 | */ |
||
3267 | inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X), |
||
3268 | negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)), |
||
3269 | brw_imm_ud(1)); |
||
3270 | |||
3271 | } else { |
||
3272 | /* Overwrite the destination without and with execution masking to |
||
3273 | * find out which of the channels is active. |
||
3274 | */ |
||
3275 | brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), |
||
3276 | brw_imm_ud(1)); |
||
3277 | |||
3278 | inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X), |
||
3279 | brw_imm_ud(0)); |
||
3280 | brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); |
||
3281 | } |
||
3282 | } |
||
3283 | |||
3284 | brw_pop_insn_state(p); |
||
3285 | } |
||
3286 | |||
3287 | void |
||
3288 | brw_broadcast(struct brw_codegen *p, |
||
3289 | struct brw_reg dst, |
||
3290 | struct brw_reg src, |
||
3291 | struct brw_reg idx) |
||
3292 | { |
||
3293 | const struct brw_device_info *devinfo = p->devinfo; |
||
3294 | const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1; |
||
3295 | brw_inst *inst; |
||
3296 | |||
3297 | assert(src.file == BRW_GENERAL_REGISTER_FILE && |
||
3298 | src.address_mode == BRW_ADDRESS_DIRECT); |
||
3299 | |||
3300 | if ((src.vstride == 0 && (src.hstride == 0 || !align1)) || |
||
3301 | idx.file == BRW_IMMEDIATE_VALUE) { |
||
3302 | /* Trivial, the source is already uniform or the index is a constant. |
||
3303 | * We will typically not get here if the optimizer is doing its job, but |
||
3304 | * asserting would be mean. |
||
3305 | */ |
||
3306 | const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0; |
||
3307 | brw_MOV(p, dst, |
||
3308 | (align1 ? stride(suboffset(src, i), 0, 1, 0) : |
||
3309 | stride(suboffset(src, 4 * i), 0, 4, 1))); |
||
3310 | } else { |
||
3311 | if (align1) { |
||
3312 | const struct brw_reg addr = |
||
3313 | retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD); |
||
3314 | const unsigned offset = src.nr * REG_SIZE + src.subnr; |
||
3315 | /* Limit in bytes of the signed indirect addressing immediate. */ |
||
3316 | const unsigned limit = 512; |
||
3317 | |||
3318 | brw_push_insn_state(p); |
||
3319 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
3320 | brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); |
||
3321 | |||
3322 | /* Take into account the component size and horizontal stride. */ |
||
3323 | assert(src.vstride == src.hstride + src.width); |
||
3324 | brw_SHL(p, addr, vec1(idx), |
||
3325 | brw_imm_ud(_mesa_logbase2(type_sz(src.type)) + |
||
3326 | src.hstride - 1)); |
||
3327 | |||
3328 | /* We can only address up to limit bytes using the indirect |
||
3329 | * addressing immediate, account for the difference if the source |
||
3330 | * register is above this limit. |
||
3331 | */ |
||
3332 | if (offset >= limit) |
||
3333 | brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit)); |
||
3334 | |||
3335 | brw_pop_insn_state(p); |
||
3336 | |||
3337 | /* Use indirect addressing to fetch the specified component. */ |
||
3338 | brw_MOV(p, dst, |
||
3339 | retype(brw_vec1_indirect(addr.subnr, offset % limit), |
||
3340 | src.type)); |
||
3341 | } else { |
||
3342 | /* In SIMD4x2 mode the index can be either zero or one, replicate it |
||
3343 | * to all bits of a flag register, |
||
3344 | */ |
||
3345 | inst = brw_MOV(p, |
||
3346 | brw_null_reg(), |
||
3347 | stride(brw_swizzle1(idx, 0), 0, 4, 1)); |
||
3348 | brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE); |
||
3349 | brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ); |
||
3350 | brw_inst_set_flag_reg_nr(devinfo, inst, 1); |
||
3351 | |||
3352 | /* and use predicated SEL to pick the right channel. */ |
||
3353 | inst = brw_SEL(p, dst, |
||
3354 | stride(suboffset(src, 4), 0, 4, 1), |
||
3355 | stride(src, 0, 4, 1)); |
||
3356 | brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL); |
||
3357 | brw_inst_set_flag_reg_nr(devinfo, inst, 1); |
||
3358 | } |
||
3359 | } |
||
3360 | } |
||
3361 | |||
3362 | /** |
||
3363 | * This instruction is generated as a single-channel align1 instruction by |
||
3364 | * both the VS and FS stages when using INTEL_DEBUG=shader_time. |
||
3365 | * |
||
3366 | * We can't use the typed atomic op in the FS because that has the execution |
||
3367 | * mask ANDed with the pixel mask, but we just want to write the one dword for |
||
3368 | * all the pixels. |
||
3369 | * |
||
3370 | * We don't use the SIMD4x2 atomic ops in the VS because want to just write |
||
3371 | * one u32. So we use the same untyped atomic write message as the pixel |
||
3372 | * shader. |
||
3373 | * |
||
3374 | * The untyped atomic operation requires a BUFFER surface type with RAW |
||
3375 | * format, and is only accessible through the legacy DATA_CACHE dataport |
||
3376 | * messages. |
||
3377 | */ |
||
3378 | void brw_shader_time_add(struct brw_codegen *p, |
||
3379 | struct brw_reg payload, |
||
3380 | uint32_t surf_index) |
||
3381 | { |
||
3382 | const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ? |
||
3383 | HSW_SFID_DATAPORT_DATA_CACHE_1 : |
||
3384 | GEN7_SFID_DATAPORT_DATA_CACHE); |
||
3385 | assert(p->devinfo->gen >= 7); |
||
3386 | |||
3387 | brw_push_insn_state(p); |
||
3388 | brw_set_default_access_mode(p, BRW_ALIGN_1); |
||
3389 | brw_set_default_mask_control(p, BRW_MASK_DISABLE); |
||
3390 | brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); |
||
3391 | brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); |
||
3392 | |||
3393 | /* We use brw_vec1_reg and unmasked because we want to increment the given |
||
3394 | * offset only once. |
||
3395 | */ |
||
3396 | brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE, |
||
3397 | BRW_ARF_NULL, 0)); |
||
3398 | brw_set_src0(p, send, brw_vec1_reg(payload.file, |
||
3399 | payload.nr, 0)); |
||
3400 | brw_set_src1(p, send, brw_imm_ud(0)); |
||
3401 | brw_set_message_descriptor(p, send, sfid, 2, 0, false, false); |
||
3402 | brw_inst_set_binding_table_index(p->devinfo, send, surf_index); |
||
3403 | brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); |
||
3404 | |||
3405 | brw_pop_insn_state(p); |
||
3406 | }><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>>>>=>>>>>>>><>>>>>>>>>>>>>>>>>>>>=>>>>>>=><=>>>>>>>>>1>1>>>>>>>>>>>>>> |