Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3254 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | #include "brw_eu.h" |
||
33 | |||
34 | #include |
||
35 | #include |
||
36 | |||
37 | /*********************************************************************** |
||
38 | * Internal helper for constructing instructions |
||
39 | */ |
||
40 | |||
41 | static void guess_execution_size(struct brw_compile *p, |
||
42 | struct brw_instruction *insn, |
||
43 | struct brw_reg reg) |
||
44 | { |
||
45 | if (reg.width == BRW_WIDTH_8 && p->compressed) |
||
46 | insn->header.execution_size = BRW_EXECUTE_16; |
||
47 | else |
||
48 | insn->header.execution_size = reg.width; |
||
49 | } |
||
50 | |||
51 | |||
52 | /** |
||
53 | * Prior to Sandybridge, the SEND instruction accepted non-MRF source |
||
54 | * registers, implicitly moving the operand to a message register. |
||
55 | * |
||
56 | * On Sandybridge, this is no longer the case. This function performs the |
||
57 | * explicit move; it should be called before emitting a SEND instruction. |
||
58 | */ |
||
59 | void |
||
60 | gen6_resolve_implied_move(struct brw_compile *p, |
||
61 | struct brw_reg *src, |
||
62 | unsigned msg_reg_nr) |
||
63 | { |
||
64 | if (p->gen < 060) |
||
65 | return; |
||
66 | |||
67 | if (src->file == BRW_MESSAGE_REGISTER_FILE) |
||
68 | return; |
||
69 | |||
70 | if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { |
||
71 | brw_push_insn_state(p); |
||
72 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
73 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
74 | brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src)); |
||
75 | brw_pop_insn_state(p); |
||
76 | } |
||
77 | *src = brw_message_reg(msg_reg_nr); |
||
78 | } |
||
79 | |||
80 | static void |
||
81 | gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) |
||
82 | { |
||
83 | /* From the BSpec / ISA Reference / send - [DevIVB+]: |
||
84 | * "The send with EOT should use register space R112-R127 for |
||
85 | * to enable loading of a new thread into the same slot while the message |
||
86 | * with EOT for current thread is pending dispatch." |
||
87 | * |
||
88 | * Since we're pretending to have 16 MRFs anyway, we may as well use the |
||
89 | * registers required for messages with EOT. |
||
90 | */ |
||
91 | if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) { |
||
92 | reg->file = BRW_GENERAL_REGISTER_FILE; |
||
93 | reg->nr += 111; |
||
94 | } |
||
95 | } |
||
96 | |||
97 | void |
||
98 | brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, |
||
99 | struct brw_reg dest) |
||
100 | { |
||
101 | if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && |
||
102 | dest.file != BRW_MESSAGE_REGISTER_FILE) |
||
103 | assert(dest.nr < 128); |
||
104 | |||
105 | gen7_convert_mrf_to_grf(p, &dest); |
||
106 | |||
107 | insn->bits1.da1.dest_reg_file = dest.file; |
||
108 | insn->bits1.da1.dest_reg_type = dest.type; |
||
109 | insn->bits1.da1.dest_address_mode = dest.address_mode; |
||
110 | |||
111 | if (dest.address_mode == BRW_ADDRESS_DIRECT) { |
||
112 | insn->bits1.da1.dest_reg_nr = dest.nr; |
||
113 | |||
114 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
115 | insn->bits1.da1.dest_subreg_nr = dest.subnr; |
||
116 | if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
||
117 | dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
||
118 | insn->bits1.da1.dest_horiz_stride = dest.hstride; |
||
119 | } else { |
||
120 | insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; |
||
121 | insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; |
||
122 | /* even ignored in da16, still need to set as '01' */ |
||
123 | insn->bits1.da16.dest_horiz_stride = 1; |
||
124 | } |
||
125 | } else { |
||
126 | insn->bits1.ia1.dest_subreg_nr = dest.subnr; |
||
127 | |||
128 | /* These are different sizes in align1 vs align16: |
||
129 | */ |
||
130 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
131 | insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; |
||
132 | if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) |
||
133 | dest.hstride = BRW_HORIZONTAL_STRIDE_1; |
||
134 | insn->bits1.ia1.dest_horiz_stride = dest.hstride; |
||
135 | } |
||
136 | else { |
||
137 | insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; |
||
138 | /* even ignored in da16, still need to set as '01' */ |
||
139 | insn->bits1.ia16.dest_horiz_stride = 1; |
||
140 | } |
||
141 | } |
||
142 | |||
143 | guess_execution_size(p, insn, dest); |
||
144 | } |
||
145 | |||
146 | static const int reg_type_size[8] = { |
||
147 | [0] = 4, |
||
148 | [1] = 4, |
||
149 | [2] = 2, |
||
150 | [3] = 2, |
||
151 | [4] = 1, |
||
152 | [5] = 1, |
||
153 | [7] = 4 |
||
154 | }; |
||
155 | |||
156 | static void |
||
157 | validate_reg(struct brw_instruction *insn, struct brw_reg reg) |
||
158 | { |
||
159 | int hstride_for_reg[] = {0, 1, 2, 4}; |
||
160 | int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256}; |
||
161 | int width_for_reg[] = {1, 2, 4, 8, 16}; |
||
162 | int execsize_for_reg[] = {1, 2, 4, 8, 16}; |
||
163 | int width, hstride, vstride, execsize; |
||
164 | |||
165 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
166 | /* 3.3.6: Region Parameters. Restriction: Immediate vectors |
||
167 | * mean the destination has to be 128-bit aligned and the |
||
168 | * destination horiz stride has to be a word. |
||
169 | */ |
||
170 | if (reg.type == BRW_REGISTER_TYPE_V) { |
||
171 | assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] * |
||
172 | reg_type_size[insn->bits1.da1.dest_reg_type] == 2); |
||
173 | } |
||
174 | |||
175 | return; |
||
176 | } |
||
177 | |||
178 | if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
179 | reg.file == BRW_ARF_NULL) |
||
180 | return; |
||
181 | |||
182 | hstride = hstride_for_reg[reg.hstride]; |
||
183 | |||
184 | if (reg.vstride == 0xf) { |
||
185 | vstride = -1; |
||
186 | } else { |
||
187 | vstride = vstride_for_reg[reg.vstride]; |
||
188 | } |
||
189 | |||
190 | width = width_for_reg[reg.width]; |
||
191 | |||
192 | execsize = execsize_for_reg[insn->header.execution_size]; |
||
193 | |||
194 | /* Restrictions from 3.3.10: Register Region Restrictions. */ |
||
195 | /* 3. */ |
||
196 | assert(execsize >= width); |
||
197 | |||
198 | /* 4. */ |
||
199 | if (execsize == width && hstride != 0) { |
||
200 | assert(vstride == -1 || vstride == width * hstride); |
||
201 | } |
||
202 | |||
203 | /* 5. */ |
||
204 | if (execsize == width && hstride == 0) { |
||
205 | /* no restriction on vstride. */ |
||
206 | } |
||
207 | |||
208 | /* 6. */ |
||
209 | if (width == 1) { |
||
210 | assert(hstride == 0); |
||
211 | } |
||
212 | |||
213 | /* 7. */ |
||
214 | if (execsize == 1 && width == 1) { |
||
215 | assert(hstride == 0); |
||
216 | assert(vstride == 0); |
||
217 | } |
||
218 | |||
219 | /* 8. */ |
||
220 | if (vstride == 0 && hstride == 0) { |
||
221 | assert(width == 1); |
||
222 | } |
||
223 | |||
224 | /* 10. Check destination issues. */ |
||
225 | } |
||
226 | |||
227 | void |
||
228 | brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, |
||
229 | struct brw_reg reg) |
||
230 | { |
||
231 | if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) |
||
232 | assert(reg.nr < 128); |
||
233 | |||
234 | gen7_convert_mrf_to_grf(p, ®); |
||
235 | |||
236 | validate_reg(insn, reg); |
||
237 | |||
238 | insn->bits1.da1.src0_reg_file = reg.file; |
||
239 | insn->bits1.da1.src0_reg_type = reg.type; |
||
240 | insn->bits2.da1.src0_abs = reg.abs; |
||
241 | insn->bits2.da1.src0_negate = reg.negate; |
||
242 | insn->bits2.da1.src0_address_mode = reg.address_mode; |
||
243 | |||
244 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
245 | insn->bits3.ud = reg.dw1.ud; |
||
246 | |||
247 | /* Required to set some fields in src1 as well: |
||
248 | */ |
||
249 | insn->bits1.da1.src1_reg_file = 0; /* arf */ |
||
250 | insn->bits1.da1.src1_reg_type = reg.type; |
||
251 | } else { |
||
252 | if (reg.address_mode == BRW_ADDRESS_DIRECT) { |
||
253 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
254 | insn->bits2.da1.src0_subreg_nr = reg.subnr; |
||
255 | insn->bits2.da1.src0_reg_nr = reg.nr; |
||
256 | } else { |
||
257 | insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; |
||
258 | insn->bits2.da16.src0_reg_nr = reg.nr; |
||
259 | } |
||
260 | } else { |
||
261 | insn->bits2.ia1.src0_subreg_nr = reg.subnr; |
||
262 | |||
263 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
264 | insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; |
||
265 | } else { |
||
266 | insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; |
||
267 | } |
||
268 | } |
||
269 | |||
270 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
271 | if (reg.width == BRW_WIDTH_1 && |
||
272 | insn->header.execution_size == BRW_EXECUTE_1) { |
||
273 | insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
274 | insn->bits2.da1.src0_width = BRW_WIDTH_1; |
||
275 | insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; |
||
276 | } else { |
||
277 | insn->bits2.da1.src0_horiz_stride = reg.hstride; |
||
278 | insn->bits2.da1.src0_width = reg.width; |
||
279 | insn->bits2.da1.src0_vert_stride = reg.vstride; |
||
280 | } |
||
281 | } else { |
||
282 | insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); |
||
283 | insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); |
||
284 | insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); |
||
285 | insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); |
||
286 | |||
287 | /* This is an oddity of the fact we're using the same |
||
288 | * descriptions for registers in align_16 as align_1: |
||
289 | */ |
||
290 | if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
||
291 | insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; |
||
292 | else |
||
293 | insn->bits2.da16.src0_vert_stride = reg.vstride; |
||
294 | } |
||
295 | } |
||
296 | } |
||
297 | |||
298 | void brw_set_src1(struct brw_compile *p, |
||
299 | struct brw_instruction *insn, |
||
300 | struct brw_reg reg) |
||
301 | { |
||
302 | assert(reg.file != BRW_MESSAGE_REGISTER_FILE); |
||
303 | assert(reg.nr < 128); |
||
304 | |||
305 | gen7_convert_mrf_to_grf(p, ®); |
||
306 | |||
307 | validate_reg(insn, reg); |
||
308 | |||
309 | insn->bits1.da1.src1_reg_file = reg.file; |
||
310 | insn->bits1.da1.src1_reg_type = reg.type; |
||
311 | insn->bits3.da1.src1_abs = reg.abs; |
||
312 | insn->bits3.da1.src1_negate = reg.negate; |
||
313 | |||
314 | /* Only src1 can be immediate in two-argument instructions. */ |
||
315 | assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); |
||
316 | |||
317 | if (reg.file == BRW_IMMEDIATE_VALUE) { |
||
318 | insn->bits3.ud = reg.dw1.ud; |
||
319 | } else { |
||
320 | /* This is a hardware restriction, which may or may not be lifted |
||
321 | * in the future: |
||
322 | */ |
||
323 | assert (reg.address_mode == BRW_ADDRESS_DIRECT); |
||
324 | /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ |
||
325 | |||
326 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
327 | insn->bits3.da1.src1_subreg_nr = reg.subnr; |
||
328 | insn->bits3.da1.src1_reg_nr = reg.nr; |
||
329 | } else { |
||
330 | insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; |
||
331 | insn->bits3.da16.src1_reg_nr = reg.nr; |
||
332 | } |
||
333 | |||
334 | if (insn->header.access_mode == BRW_ALIGN_1) { |
||
335 | if (reg.width == BRW_WIDTH_1 && |
||
336 | insn->header.execution_size == BRW_EXECUTE_1) { |
||
337 | insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; |
||
338 | insn->bits3.da1.src1_width = BRW_WIDTH_1; |
||
339 | insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; |
||
340 | } else { |
||
341 | insn->bits3.da1.src1_horiz_stride = reg.hstride; |
||
342 | insn->bits3.da1.src1_width = reg.width; |
||
343 | insn->bits3.da1.src1_vert_stride = reg.vstride; |
||
344 | } |
||
345 | } else { |
||
346 | insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); |
||
347 | insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); |
||
348 | insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); |
||
349 | insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); |
||
350 | |||
351 | /* This is an oddity of the fact we're using the same |
||
352 | * descriptions for registers in align_16 as align_1: |
||
353 | */ |
||
354 | if (reg.vstride == BRW_VERTICAL_STRIDE_8) |
||
355 | insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; |
||
356 | else |
||
357 | insn->bits3.da16.src1_vert_stride = reg.vstride; |
||
358 | } |
||
359 | } |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Set the Message Descriptor and Extended Message Descriptor fields |
||
364 | * for SEND messages. |
||
365 | * |
||
366 | * \note This zeroes out the Function Control bits, so it must be called |
||
367 | * \b before filling out any message-specific data. Callers can |
||
368 | * choose not to fill in irrelevant bits; they will be zero. |
||
369 | */ |
||
370 | static void |
||
371 | brw_set_message_descriptor(struct brw_compile *p, |
||
372 | struct brw_instruction *inst, |
||
373 | enum brw_message_target sfid, |
||
374 | unsigned msg_length, |
||
375 | unsigned response_length, |
||
376 | bool header_present, |
||
377 | bool end_of_thread) |
||
378 | { |
||
379 | brw_set_src1(p, inst, brw_imm_d(0)); |
||
380 | |||
381 | if (p->gen >= 050) { |
||
382 | inst->bits3.generic_gen5.header_present = header_present; |
||
383 | inst->bits3.generic_gen5.response_length = response_length; |
||
384 | inst->bits3.generic_gen5.msg_length = msg_length; |
||
385 | inst->bits3.generic_gen5.end_of_thread = end_of_thread; |
||
386 | |||
387 | if (p->gen >= 060) { |
||
388 | /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */ |
||
389 | inst->header.destreg__conditionalmod = sfid; |
||
390 | } else { |
||
391 | /* Set Extended Message Descriptor (ex_desc) */ |
||
392 | inst->bits2.send_gen5.sfid = sfid; |
||
393 | inst->bits2.send_gen5.end_of_thread = end_of_thread; |
||
394 | } |
||
395 | } else { |
||
396 | inst->bits3.generic.response_length = response_length; |
||
397 | inst->bits3.generic.msg_length = msg_length; |
||
398 | inst->bits3.generic.msg_target = sfid; |
||
399 | inst->bits3.generic.end_of_thread = end_of_thread; |
||
400 | } |
||
401 | } |
||
402 | |||
403 | |||
404 | static void brw_set_math_message(struct brw_compile *p, |
||
405 | struct brw_instruction *insn, |
||
406 | unsigned function, |
||
407 | unsigned integer_type, |
||
408 | bool low_precision, |
||
409 | bool saturate, |
||
410 | unsigned dataType) |
||
411 | { |
||
412 | unsigned msg_length; |
||
413 | unsigned response_length; |
||
414 | |||
415 | /* Infer message length from the function */ |
||
416 | switch (function) { |
||
417 | case BRW_MATH_FUNCTION_POW: |
||
418 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: |
||
419 | case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: |
||
420 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
||
421 | msg_length = 2; |
||
422 | break; |
||
423 | default: |
||
424 | msg_length = 1; |
||
425 | break; |
||
426 | } |
||
427 | |||
428 | /* Infer response length from the function */ |
||
429 | switch (function) { |
||
430 | case BRW_MATH_FUNCTION_SINCOS: |
||
431 | case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: |
||
432 | response_length = 2; |
||
433 | break; |
||
434 | default: |
||
435 | response_length = 1; |
||
436 | break; |
||
437 | } |
||
438 | |||
439 | brw_set_message_descriptor(p, insn, BRW_SFID_MATH, |
||
440 | msg_length, response_length, |
||
441 | false, false); |
||
442 | if (p->gen == 050) { |
||
443 | insn->bits3.math_gen5.function = function; |
||
444 | insn->bits3.math_gen5.int_type = integer_type; |
||
445 | insn->bits3.math_gen5.precision = low_precision; |
||
446 | insn->bits3.math_gen5.saturate = saturate; |
||
447 | insn->bits3.math_gen5.data_type = dataType; |
||
448 | insn->bits3.math_gen5.snapshot = 0; |
||
449 | } else { |
||
450 | insn->bits3.math.function = function; |
||
451 | insn->bits3.math.int_type = integer_type; |
||
452 | insn->bits3.math.precision = low_precision; |
||
453 | insn->bits3.math.saturate = saturate; |
||
454 | insn->bits3.math.data_type = dataType; |
||
455 | } |
||
456 | } |
||
457 | |||
458 | static void brw_set_ff_sync_message(struct brw_compile *p, |
||
459 | struct brw_instruction *insn, |
||
460 | bool allocate, |
||
461 | unsigned response_length, |
||
462 | bool end_of_thread) |
||
463 | { |
||
464 | brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
||
465 | 1, response_length, |
||
466 | true, end_of_thread); |
||
467 | insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ |
||
468 | insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ |
||
469 | insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */ |
||
470 | insn->bits3.urb_gen5.allocate = allocate; |
||
471 | insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */ |
||
472 | insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */ |
||
473 | } |
||
474 | |||
475 | static void brw_set_urb_message(struct brw_compile *p, |
||
476 | struct brw_instruction *insn, |
||
477 | bool allocate, |
||
478 | bool used, |
||
479 | unsigned msg_length, |
||
480 | unsigned response_length, |
||
481 | bool end_of_thread, |
||
482 | bool complete, |
||
483 | unsigned offset, |
||
484 | unsigned swizzle_control) |
||
485 | { |
||
486 | brw_set_message_descriptor(p, insn, BRW_SFID_URB, |
||
487 | msg_length, response_length, true, end_of_thread); |
||
488 | if (p->gen >= 070) { |
||
489 | insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ |
||
490 | insn->bits3.urb_gen7.offset = offset; |
||
491 | assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); |
||
492 | insn->bits3.urb_gen7.swizzle_control = swizzle_control; |
||
493 | /* per_slot_offset = 0 makes it ignore offsets in message header */ |
||
494 | insn->bits3.urb_gen7.per_slot_offset = 0; |
||
495 | insn->bits3.urb_gen7.complete = complete; |
||
496 | } else if (p->gen >= 050) { |
||
497 | insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ |
||
498 | insn->bits3.urb_gen5.offset = offset; |
||
499 | insn->bits3.urb_gen5.swizzle_control = swizzle_control; |
||
500 | insn->bits3.urb_gen5.allocate = allocate; |
||
501 | insn->bits3.urb_gen5.used = used; /* ? */ |
||
502 | insn->bits3.urb_gen5.complete = complete; |
||
503 | } else { |
||
504 | insn->bits3.urb.opcode = 0; /* ? */ |
||
505 | insn->bits3.urb.offset = offset; |
||
506 | insn->bits3.urb.swizzle_control = swizzle_control; |
||
507 | insn->bits3.urb.allocate = allocate; |
||
508 | insn->bits3.urb.used = used; /* ? */ |
||
509 | insn->bits3.urb.complete = complete; |
||
510 | } |
||
511 | } |
||
512 | |||
513 | void |
||
514 | brw_set_dp_write_message(struct brw_compile *p, |
||
515 | struct brw_instruction *insn, |
||
516 | unsigned binding_table_index, |
||
517 | unsigned msg_control, |
||
518 | unsigned msg_type, |
||
519 | unsigned msg_length, |
||
520 | bool header_present, |
||
521 | bool last_render_target, |
||
522 | unsigned response_length, |
||
523 | bool end_of_thread, |
||
524 | bool send_commit_msg) |
||
525 | { |
||
526 | unsigned sfid; |
||
527 | |||
528 | if (p->gen >= 070) { |
||
529 | /* Use the Render Cache for RT writes; otherwise use the Data Cache */ |
||
530 | if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) |
||
531 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
532 | else |
||
533 | sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
||
534 | } else if (p->gen >= 060) { |
||
535 | /* Use the render cache for all write messages. */ |
||
536 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
537 | } else { |
||
538 | sfid = BRW_SFID_DATAPORT_WRITE; |
||
539 | } |
||
540 | |||
541 | brw_set_message_descriptor(p, insn, sfid, |
||
542 | msg_length, response_length, |
||
543 | header_present, end_of_thread); |
||
544 | |||
545 | if (p->gen >= 070) { |
||
546 | insn->bits3.gen7_dp.binding_table_index = binding_table_index; |
||
547 | insn->bits3.gen7_dp.msg_control = msg_control; |
||
548 | insn->bits3.gen7_dp.last_render_target = last_render_target; |
||
549 | insn->bits3.gen7_dp.msg_type = msg_type; |
||
550 | } else if (p->gen >= 060) { |
||
551 | insn->bits3.gen6_dp.binding_table_index = binding_table_index; |
||
552 | insn->bits3.gen6_dp.msg_control = msg_control; |
||
553 | insn->bits3.gen6_dp.last_render_target = last_render_target; |
||
554 | insn->bits3.gen6_dp.msg_type = msg_type; |
||
555 | insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; |
||
556 | } else if (p->gen >= 050) { |
||
557 | insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; |
||
558 | insn->bits3.dp_write_gen5.msg_control = msg_control; |
||
559 | insn->bits3.dp_write_gen5.last_render_target = last_render_target; |
||
560 | insn->bits3.dp_write_gen5.msg_type = msg_type; |
||
561 | insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; |
||
562 | } else { |
||
563 | insn->bits3.dp_write.binding_table_index = binding_table_index; |
||
564 | insn->bits3.dp_write.msg_control = msg_control; |
||
565 | insn->bits3.dp_write.last_render_target = last_render_target; |
||
566 | insn->bits3.dp_write.msg_type = msg_type; |
||
567 | insn->bits3.dp_write.send_commit_msg = send_commit_msg; |
||
568 | } |
||
569 | } |
||
570 | |||
571 | void |
||
572 | brw_set_dp_read_message(struct brw_compile *p, |
||
573 | struct brw_instruction *insn, |
||
574 | unsigned binding_table_index, |
||
575 | unsigned msg_control, |
||
576 | unsigned msg_type, |
||
577 | unsigned target_cache, |
||
578 | unsigned msg_length, |
||
579 | unsigned response_length) |
||
580 | { |
||
581 | unsigned sfid; |
||
582 | |||
583 | if (p->gen >= 070) { |
||
584 | sfid = GEN7_SFID_DATAPORT_DATA_CACHE; |
||
585 | } else if (p->gen >= 060) { |
||
586 | if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) |
||
587 | sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; |
||
588 | else |
||
589 | sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; |
||
590 | } else { |
||
591 | sfid = BRW_SFID_DATAPORT_READ; |
||
592 | } |
||
593 | |||
594 | brw_set_message_descriptor(p, insn, sfid, |
||
595 | msg_length, response_length, |
||
596 | true, false); |
||
597 | |||
598 | if (p->gen >= 070) { |
||
599 | insn->bits3.gen7_dp.binding_table_index = binding_table_index; |
||
600 | insn->bits3.gen7_dp.msg_control = msg_control; |
||
601 | insn->bits3.gen7_dp.last_render_target = 0; |
||
602 | insn->bits3.gen7_dp.msg_type = msg_type; |
||
603 | } else if (p->gen >= 060) { |
||
604 | insn->bits3.gen6_dp.binding_table_index = binding_table_index; |
||
605 | insn->bits3.gen6_dp.msg_control = msg_control; |
||
606 | insn->bits3.gen6_dp.last_render_target = 0; |
||
607 | insn->bits3.gen6_dp.msg_type = msg_type; |
||
608 | insn->bits3.gen6_dp.send_commit_msg = 0; |
||
609 | } else if (p->gen >= 050) { |
||
610 | insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; |
||
611 | insn->bits3.dp_read_gen5.msg_control = msg_control; |
||
612 | insn->bits3.dp_read_gen5.msg_type = msg_type; |
||
613 | insn->bits3.dp_read_gen5.target_cache = target_cache; |
||
614 | } else if (p->gen >= 045) { |
||
615 | insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ |
||
616 | insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ |
||
617 | insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ |
||
618 | insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ |
||
619 | } else { |
||
620 | insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ |
||
621 | insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ |
||
622 | insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ |
||
623 | insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ |
||
624 | } |
||
625 | } |
||
626 | |||
627 | static void brw_set_sampler_message(struct brw_compile *p, |
||
628 | struct brw_instruction *insn, |
||
629 | unsigned binding_table_index, |
||
630 | unsigned sampler, |
||
631 | unsigned msg_type, |
||
632 | unsigned response_length, |
||
633 | unsigned msg_length, |
||
634 | bool header_present, |
||
635 | unsigned simd_mode) |
||
636 | { |
||
637 | brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, |
||
638 | msg_length, response_length, |
||
639 | header_present, false); |
||
640 | |||
641 | if (p->gen >= 070) { |
||
642 | insn->bits3.sampler_gen7.binding_table_index = binding_table_index; |
||
643 | insn->bits3.sampler_gen7.sampler = sampler; |
||
644 | insn->bits3.sampler_gen7.msg_type = msg_type; |
||
645 | insn->bits3.sampler_gen7.simd_mode = simd_mode; |
||
646 | } else if (p->gen >= 050) { |
||
647 | insn->bits3.sampler_gen5.binding_table_index = binding_table_index; |
||
648 | insn->bits3.sampler_gen5.sampler = sampler; |
||
649 | insn->bits3.sampler_gen5.msg_type = msg_type; |
||
650 | insn->bits3.sampler_gen5.simd_mode = simd_mode; |
||
651 | } else if (p->gen >= 045) { |
||
652 | insn->bits3.sampler_g4x.binding_table_index = binding_table_index; |
||
653 | insn->bits3.sampler_g4x.sampler = sampler; |
||
654 | insn->bits3.sampler_g4x.msg_type = msg_type; |
||
655 | } else { |
||
656 | insn->bits3.sampler.binding_table_index = binding_table_index; |
||
657 | insn->bits3.sampler.sampler = sampler; |
||
658 | insn->bits3.sampler.msg_type = msg_type; |
||
659 | insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; |
||
660 | } |
||
661 | } |
||
662 | |||
663 | |||
664 | void brw_NOP(struct brw_compile *p) |
||
665 | { |
||
666 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP); |
||
667 | brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
||
668 | brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
||
669 | brw_set_src1(p, insn, brw_imm_ud(0x0)); |
||
670 | } |
||
671 | |||
672 | /*********************************************************************** |
||
673 | * Comparisons, if/else/endif |
||
674 | */ |
||
675 | |||
676 | static void |
||
677 | push_if_stack(struct brw_compile *p, struct brw_instruction *inst) |
||
678 | { |
||
679 | p->if_stack[p->if_stack_depth] = inst; |
||
680 | |||
681 | p->if_stack_depth++; |
||
682 | if (p->if_stack_array_size <= p->if_stack_depth) { |
||
683 | p->if_stack_array_size *= 2; |
||
684 | p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size); |
||
685 | } |
||
686 | } |
||
687 | |||
688 | /* EU takes the value from the flag register and pushes it onto some |
||
689 | * sort of a stack (presumably merging with any flag value already on |
||
690 | * the stack). Within an if block, the flags at the top of the stack |
||
691 | * control execution on each channel of the unit, eg. on each of the |
||
692 | * 16 pixel values in our wm programs. |
||
693 | * |
||
694 | * When the matching 'else' instruction is reached (presumably by |
||
695 | * countdown of the instruction count patched in by our ELSE/ENDIF |
||
696 | * functions), the relevent flags are inverted. |
||
697 | * |
||
698 | * When the matching 'endif' instruction is reached, the flags are |
||
699 | * popped off. If the stack is now empty, normal execution resumes. |
||
700 | */ |
||
701 | struct brw_instruction * |
||
702 | brw_IF(struct brw_compile *p, unsigned execute_size) |
||
703 | { |
||
704 | struct brw_instruction *insn; |
||
705 | |||
706 | insn = brw_next_insn(p, BRW_OPCODE_IF); |
||
707 | |||
708 | /* Override the defaults for this instruction: */ |
||
709 | if (p->gen < 060) { |
||
710 | brw_set_dest(p, insn, brw_ip_reg()); |
||
711 | brw_set_src0(p, insn, brw_ip_reg()); |
||
712 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
713 | } else if (p->gen < 070) { |
||
714 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
715 | insn->bits1.branch_gen6.jump_count = 0; |
||
716 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
717 | brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
||
718 | } else { |
||
719 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
720 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
721 | brw_set_src1(p, insn, brw_imm_ud(0)); |
||
722 | insn->bits3.break_cont.jip = 0; |
||
723 | insn->bits3.break_cont.uip = 0; |
||
724 | } |
||
725 | |||
726 | insn->header.execution_size = execute_size; |
||
727 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
728 | insn->header.predicate_control = BRW_PREDICATE_NORMAL; |
||
729 | insn->header.mask_control = BRW_MASK_ENABLE; |
||
730 | if (!p->single_program_flow) |
||
731 | insn->header.thread_control = BRW_THREAD_SWITCH; |
||
732 | |||
733 | p->current->header.predicate_control = BRW_PREDICATE_NONE; |
||
734 | |||
735 | push_if_stack(p, insn); |
||
736 | return insn; |
||
737 | } |
||
738 | |||
739 | /* This function is only used for gen6-style IF instructions with an |
||
740 | * embedded comparison (conditional modifier). It is not used on gen7. |
||
741 | */ |
||
742 | struct brw_instruction * |
||
743 | gen6_IF(struct brw_compile *p, uint32_t conditional, |
||
744 | struct brw_reg src0, struct brw_reg src1) |
||
745 | { |
||
746 | struct brw_instruction *insn; |
||
747 | |||
748 | insn = brw_next_insn(p, BRW_OPCODE_IF); |
||
749 | |||
750 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
751 | if (p->compressed) { |
||
752 | insn->header.execution_size = BRW_EXECUTE_16; |
||
753 | } else { |
||
754 | insn->header.execution_size = BRW_EXECUTE_8; |
||
755 | } |
||
756 | insn->bits1.branch_gen6.jump_count = 0; |
||
757 | brw_set_src0(p, insn, src0); |
||
758 | brw_set_src1(p, insn, src1); |
||
759 | |||
760 | assert(insn->header.compression_control == BRW_COMPRESSION_NONE); |
||
761 | assert(insn->header.predicate_control == BRW_PREDICATE_NONE); |
||
762 | insn->header.destreg__conditionalmod = conditional; |
||
763 | |||
764 | if (!p->single_program_flow) |
||
765 | insn->header.thread_control = BRW_THREAD_SWITCH; |
||
766 | |||
767 | push_if_stack(p, insn); |
||
768 | return insn; |
||
769 | } |
||
770 | |||
771 | /** |
||
772 | * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. |
||
773 | */ |
||
774 | static void |
||
775 | convert_IF_ELSE_to_ADD(struct brw_compile *p, |
||
776 | struct brw_instruction *if_inst, |
||
777 | struct brw_instruction *else_inst) |
||
778 | { |
||
779 | /* The next instruction (where the ENDIF would be, if it existed) */ |
||
780 | struct brw_instruction *next_inst = &p->store[p->nr_insn]; |
||
781 | |||
782 | assert(p->single_program_flow); |
||
783 | assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); |
||
784 | assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); |
||
785 | assert(if_inst->header.execution_size == BRW_EXECUTE_1); |
||
786 | |||
787 | /* Convert IF to an ADD instruction that moves the instruction pointer |
||
788 | * to the first instruction of the ELSE block. If there is no ELSE |
||
789 | * block, point to where ENDIF would be. Reverse the predicate. |
||
790 | * |
||
791 | * There's no need to execute an ENDIF since we don't need to do any |
||
792 | * stack operations, and if we're currently executing, we just want to |
||
793 | * continue normally. |
||
794 | */ |
||
795 | if_inst->header.opcode = BRW_OPCODE_ADD; |
||
796 | if_inst->header.predicate_inverse = 1; |
||
797 | |||
798 | if (else_inst != NULL) { |
||
799 | /* Convert ELSE to an ADD instruction that points where the ENDIF |
||
800 | * would be. |
||
801 | */ |
||
802 | else_inst->header.opcode = BRW_OPCODE_ADD; |
||
803 | |||
804 | if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; |
||
805 | else_inst->bits3.ud = (next_inst - else_inst) * 16; |
||
806 | } else { |
||
807 | if_inst->bits3.ud = (next_inst - if_inst) * 16; |
||
808 | } |
||
809 | } |
||
810 | |||
811 | /** |
||
812 | * Patch IF and ELSE instructions with appropriate jump targets. |
||
813 | */ |
||
814 | static void |
||
815 | patch_IF_ELSE(struct brw_compile *p, |
||
816 | struct brw_instruction *if_inst, |
||
817 | struct brw_instruction *else_inst, |
||
818 | struct brw_instruction *endif_inst) |
||
819 | { |
||
820 | unsigned br = 1; |
||
821 | |||
822 | assert(!p->single_program_flow); |
||
823 | assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); |
||
824 | assert(endif_inst != NULL); |
||
825 | assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); |
||
826 | |||
827 | /* Jump count is for 64bit data chunk each, so one 128bit instruction |
||
828 | * requires 2 chunks. |
||
829 | */ |
||
830 | if (p->gen >= 050) |
||
831 | br = 2; |
||
832 | |||
833 | assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); |
||
834 | endif_inst->header.execution_size = if_inst->header.execution_size; |
||
835 | |||
836 | if (else_inst == NULL) { |
||
837 | /* Patch IF -> ENDIF */ |
||
838 | if (p->gen < 060) { |
||
839 | /* Turn it into an IFF, which means no mask stack operations for |
||
840 | * all-false and jumping past the ENDIF. |
||
841 | */ |
||
842 | if_inst->header.opcode = BRW_OPCODE_IFF; |
||
843 | if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); |
||
844 | if_inst->bits3.if_else.pop_count = 0; |
||
845 | if_inst->bits3.if_else.pad0 = 0; |
||
846 | } else if (p->gen < 070) { |
||
847 | /* As of gen6, there is no IFF and IF must point to the ENDIF. */ |
||
848 | if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); |
||
849 | } else { |
||
850 | if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); |
||
851 | if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); |
||
852 | } |
||
853 | } else { |
||
854 | else_inst->header.execution_size = if_inst->header.execution_size; |
||
855 | |||
856 | /* Patch IF -> ELSE */ |
||
857 | if (p->gen < 060) { |
||
858 | if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); |
||
859 | if_inst->bits3.if_else.pop_count = 0; |
||
860 | if_inst->bits3.if_else.pad0 = 0; |
||
861 | } else if (p->gen <= 070) { |
||
862 | if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); |
||
863 | } |
||
864 | |||
865 | /* Patch ELSE -> ENDIF */ |
||
866 | if (p->gen < 060) { |
||
867 | /* BRW_OPCODE_ELSE pre-gen6 should point just past the |
||
868 | * matching ENDIF. |
||
869 | */ |
||
870 | else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); |
||
871 | else_inst->bits3.if_else.pop_count = 1; |
||
872 | else_inst->bits3.if_else.pad0 = 0; |
||
873 | } else if (p->gen < 070) { |
||
874 | /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ |
||
875 | else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); |
||
876 | } else { |
||
877 | /* The IF instruction's JIP should point just past the ELSE */ |
||
878 | if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); |
||
879 | /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ |
||
880 | if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); |
||
881 | else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); |
||
882 | } |
||
883 | } |
||
884 | } |
||
885 | |||
886 | void |
||
887 | brw_ELSE(struct brw_compile *p) |
||
888 | { |
||
889 | struct brw_instruction *insn; |
||
890 | |||
891 | insn = brw_next_insn(p, BRW_OPCODE_ELSE); |
||
892 | |||
893 | if (p->gen < 060) { |
||
894 | brw_set_dest(p, insn, brw_ip_reg()); |
||
895 | brw_set_src0(p, insn, brw_ip_reg()); |
||
896 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
897 | } else if (p->gen < 070) { |
||
898 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
899 | insn->bits1.branch_gen6.jump_count = 0; |
||
900 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
901 | brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
||
902 | } else { |
||
903 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
904 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
905 | brw_set_src1(p, insn, brw_imm_ud(0)); |
||
906 | insn->bits3.break_cont.jip = 0; |
||
907 | insn->bits3.break_cont.uip = 0; |
||
908 | } |
||
909 | |||
910 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
911 | insn->header.mask_control = BRW_MASK_ENABLE; |
||
912 | if (!p->single_program_flow) |
||
913 | insn->header.thread_control = BRW_THREAD_SWITCH; |
||
914 | |||
915 | push_if_stack(p, insn); |
||
916 | } |
||
917 | |||
918 | void |
||
919 | brw_ENDIF(struct brw_compile *p) |
||
920 | { |
||
921 | struct brw_instruction *insn; |
||
922 | struct brw_instruction *else_inst = NULL; |
||
923 | struct brw_instruction *if_inst = NULL; |
||
924 | |||
925 | /* Pop the IF and (optional) ELSE instructions from the stack */ |
||
926 | p->if_stack_depth--; |
||
927 | if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { |
||
928 | else_inst = p->if_stack[p->if_stack_depth]; |
||
929 | p->if_stack_depth--; |
||
930 | } |
||
931 | if_inst = p->if_stack[p->if_stack_depth]; |
||
932 | |||
933 | if (p->single_program_flow) { |
||
934 | /* ENDIF is useless; don't bother emitting it. */ |
||
935 | convert_IF_ELSE_to_ADD(p, if_inst, else_inst); |
||
936 | return; |
||
937 | } |
||
938 | |||
939 | insn = brw_next_insn(p, BRW_OPCODE_ENDIF); |
||
940 | |||
941 | if (p->gen < 060) { |
||
942 | brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
||
943 | brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0))); |
||
944 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
945 | } else if (p->gen < 070) { |
||
946 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
947 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
948 | brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
||
949 | } else { |
||
950 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
951 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
952 | brw_set_src1(p, insn, brw_imm_ud(0)); |
||
953 | } |
||
954 | |||
955 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
956 | insn->header.mask_control = BRW_MASK_ENABLE; |
||
957 | insn->header.thread_control = BRW_THREAD_SWITCH; |
||
958 | |||
959 | /* Also pop item off the stack in the endif instruction: */ |
||
960 | if (p->gen < 060) { |
||
961 | insn->bits3.if_else.jump_count = 0; |
||
962 | insn->bits3.if_else.pop_count = 1; |
||
963 | insn->bits3.if_else.pad0 = 0; |
||
964 | } else if (p->gen < 070) { |
||
965 | insn->bits1.branch_gen6.jump_count = 2; |
||
966 | } else { |
||
967 | insn->bits3.break_cont.jip = 2; |
||
968 | } |
||
969 | patch_IF_ELSE(p, if_inst, else_inst, insn); |
||
970 | } |
||
971 | |||
972 | struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) |
||
973 | { |
||
974 | struct brw_instruction *insn; |
||
975 | |||
976 | insn = brw_next_insn(p, BRW_OPCODE_BREAK); |
||
977 | if (p->gen >= 060) { |
||
978 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
979 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
980 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
981 | } else { |
||
982 | brw_set_dest(p, insn, brw_ip_reg()); |
||
983 | brw_set_src0(p, insn, brw_ip_reg()); |
||
984 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
985 | insn->bits3.if_else.pad0 = 0; |
||
986 | insn->bits3.if_else.pop_count = pop_count; |
||
987 | } |
||
988 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
989 | insn->header.execution_size = BRW_EXECUTE_8; |
||
990 | |||
991 | return insn; |
||
992 | } |
||
993 | |||
994 | struct brw_instruction *gen6_CONT(struct brw_compile *p, |
||
995 | struct brw_instruction *do_insn) |
||
996 | { |
||
997 | struct brw_instruction *insn; |
||
998 | |||
999 | insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); |
||
1000 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
1001 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
1002 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1003 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1004 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1005 | |||
1006 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1007 | insn->header.execution_size = BRW_EXECUTE_8; |
||
1008 | return insn; |
||
1009 | } |
||
1010 | |||
1011 | struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) |
||
1012 | { |
||
1013 | struct brw_instruction *insn; |
||
1014 | insn = brw_next_insn(p, BRW_OPCODE_CONTINUE); |
||
1015 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1016 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1017 | brw_set_src1(p, insn, brw_imm_d(0x0)); |
||
1018 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1019 | insn->header.execution_size = BRW_EXECUTE_8; |
||
1020 | /* insn->header.mask_control = BRW_MASK_DISABLE; */ |
||
1021 | insn->bits3.if_else.pad0 = 0; |
||
1022 | insn->bits3.if_else.pop_count = pop_count; |
||
1023 | return insn; |
||
1024 | } |
||
1025 | |||
1026 | /* DO/WHILE loop: |
||
1027 | * |
||
1028 | * The DO/WHILE is just an unterminated loop -- break or continue are |
||
1029 | * used for control within the loop. We have a few ways they can be |
||
1030 | * done. |
||
1031 | * |
||
1032 | * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, |
||
1033 | * jip and no DO instruction. |
||
1034 | * |
||
1035 | * For non-uniform control flow pre-gen6, there's a DO instruction to |
||
1036 | * push the mask, and a WHILE to jump back, and BREAK to get out and |
||
1037 | * pop the mask. |
||
1038 | * |
||
1039 | * For gen6, there's no more mask stack, so no need for DO. WHILE |
||
1040 | * just points back to the first instruction of the loop. |
||
1041 | */ |
||
1042 | struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) |
||
1043 | { |
||
1044 | if (p->gen >= 060 || p->single_program_flow) { |
||
1045 | return &p->store[p->nr_insn]; |
||
1046 | } else { |
||
1047 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO); |
||
1048 | |||
1049 | /* Override the defaults for this instruction: |
||
1050 | */ |
||
1051 | brw_set_dest(p, insn, brw_null_reg()); |
||
1052 | brw_set_src0(p, insn, brw_null_reg()); |
||
1053 | brw_set_src1(p, insn, brw_null_reg()); |
||
1054 | |||
1055 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1056 | insn->header.execution_size = execute_size; |
||
1057 | insn->header.predicate_control = BRW_PREDICATE_NONE; |
||
1058 | /* insn->header.mask_control = BRW_MASK_ENABLE; */ |
||
1059 | /* insn->header.mask_control = BRW_MASK_DISABLE; */ |
||
1060 | |||
1061 | return insn; |
||
1062 | } |
||
1063 | } |
||
1064 | |||
1065 | struct brw_instruction *brw_WHILE(struct brw_compile *p, |
||
1066 | struct brw_instruction *do_insn) |
||
1067 | { |
||
1068 | struct brw_instruction *insn; |
||
1069 | unsigned br = 1; |
||
1070 | |||
1071 | if (p->gen >= 050) |
||
1072 | br = 2; |
||
1073 | |||
1074 | if (p->gen >= 070) { |
||
1075 | insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
||
1076 | |||
1077 | brw_set_dest(p, insn, __retype_d(brw_null_reg())); |
||
1078 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
1079 | brw_set_src1(p, insn, brw_imm_ud(0)); |
||
1080 | insn->bits3.break_cont.jip = br * (do_insn - insn); |
||
1081 | |||
1082 | insn->header.execution_size = BRW_EXECUTE_8; |
||
1083 | } else if (p->gen >= 060) { |
||
1084 | insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
||
1085 | |||
1086 | brw_set_dest(p, insn, brw_imm_w(0)); |
||
1087 | insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); |
||
1088 | brw_set_src0(p, insn, __retype_d(brw_null_reg())); |
||
1089 | brw_set_src1(p, insn, __retype_d(brw_null_reg())); |
||
1090 | |||
1091 | insn->header.execution_size = BRW_EXECUTE_8; |
||
1092 | } else { |
||
1093 | if (p->single_program_flow) { |
||
1094 | insn = brw_next_insn(p, BRW_OPCODE_ADD); |
||
1095 | |||
1096 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1097 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1098 | brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); |
||
1099 | insn->header.execution_size = BRW_EXECUTE_1; |
||
1100 | } else { |
||
1101 | insn = brw_next_insn(p, BRW_OPCODE_WHILE); |
||
1102 | |||
1103 | assert(do_insn->header.opcode == BRW_OPCODE_DO); |
||
1104 | |||
1105 | brw_set_dest(p, insn, brw_ip_reg()); |
||
1106 | brw_set_src0(p, insn, brw_ip_reg()); |
||
1107 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
1108 | |||
1109 | insn->header.execution_size = do_insn->header.execution_size; |
||
1110 | insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); |
||
1111 | insn->bits3.if_else.pop_count = 0; |
||
1112 | insn->bits3.if_else.pad0 = 0; |
||
1113 | } |
||
1114 | } |
||
1115 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1116 | p->current->header.predicate_control = BRW_PREDICATE_NONE; |
||
1117 | |||
1118 | return insn; |
||
1119 | } |
||
1120 | |||
1121 | /* FORWARD JUMPS: |
||
1122 | */ |
||
1123 | void brw_land_fwd_jump(struct brw_compile *p, |
||
1124 | struct brw_instruction *jmp_insn) |
||
1125 | { |
||
1126 | struct brw_instruction *landing = &p->store[p->nr_insn]; |
||
1127 | unsigned jmpi = 1; |
||
1128 | |||
1129 | if (p->gen >= 050) |
||
1130 | jmpi = 2; |
||
1131 | |||
1132 | assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); |
||
1133 | assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE); |
||
1134 | |||
1135 | jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); |
||
1136 | } |
||
1137 | |||
1138 | |||
1139 | |||
1140 | /* To integrate with the above, it makes sense that the comparison |
||
1141 | * instruction should populate the flag register. It might be simpler |
||
1142 | * just to use the flag reg for most WM tasks? |
||
1143 | */ |
||
1144 | void brw_CMP(struct brw_compile *p, |
||
1145 | struct brw_reg dest, |
||
1146 | unsigned conditional, |
||
1147 | struct brw_reg src0, |
||
1148 | struct brw_reg src1) |
||
1149 | { |
||
1150 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP); |
||
1151 | |||
1152 | insn->header.destreg__conditionalmod = conditional; |
||
1153 | brw_set_dest(p, insn, dest); |
||
1154 | brw_set_src0(p, insn, src0); |
||
1155 | brw_set_src1(p, insn, src1); |
||
1156 | |||
1157 | /* Make it so that future instructions will use the computed flag |
||
1158 | * value until brw_set_predicate_control_flag_value() is called |
||
1159 | * again. |
||
1160 | */ |
||
1161 | if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && |
||
1162 | dest.nr == 0) { |
||
1163 | p->current->header.predicate_control = BRW_PREDICATE_NORMAL; |
||
1164 | p->flag_value = 0xff; |
||
1165 | } |
||
1166 | } |
||
1167 | |||
1168 | /* Issue 'wait' instruction for n1, host could program MMIO |
||
1169 | to wake up thread. */ |
||
1170 | void brw_WAIT(struct brw_compile *p) |
||
1171 | { |
||
1172 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT); |
||
1173 | struct brw_reg src = brw_notification_1_reg(); |
||
1174 | |||
1175 | brw_set_dest(p, insn, src); |
||
1176 | brw_set_src0(p, insn, src); |
||
1177 | brw_set_src1(p, insn, brw_null_reg()); |
||
1178 | insn->header.execution_size = 0; /* must */ |
||
1179 | insn->header.predicate_control = 0; |
||
1180 | insn->header.compression_control = 0; |
||
1181 | } |
||
1182 | |||
1183 | /*********************************************************************** |
||
1184 | * Helpers for the various SEND message types: |
||
1185 | */ |
||
1186 | |||
1187 | /** Extended math function, float[8]. |
||
1188 | */ |
||
1189 | void brw_math(struct brw_compile *p, |
||
1190 | struct brw_reg dest, |
||
1191 | unsigned function, |
||
1192 | unsigned saturate, |
||
1193 | unsigned msg_reg_nr, |
||
1194 | struct brw_reg src, |
||
1195 | unsigned data_type, |
||
1196 | unsigned precision) |
||
1197 | { |
||
1198 | if (p->gen >= 060) { |
||
1199 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); |
||
1200 | |||
1201 | assert(dest.file == BRW_GENERAL_REGISTER_FILE); |
||
1202 | assert(src.file == BRW_GENERAL_REGISTER_FILE); |
||
1203 | |||
1204 | assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1205 | assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1206 | |||
1207 | /* Source modifiers are ignored for extended math instructions. */ |
||
1208 | assert(!src.negate); |
||
1209 | assert(!src.abs); |
||
1210 | |||
1211 | if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && |
||
1212 | function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { |
||
1213 | assert(src.type == BRW_REGISTER_TYPE_F); |
||
1214 | } |
||
1215 | |||
1216 | /* Math is the same ISA format as other opcodes, except that CondModifier |
||
1217 | * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
||
1218 | */ |
||
1219 | insn->header.destreg__conditionalmod = function; |
||
1220 | insn->header.saturate = saturate; |
||
1221 | |||
1222 | brw_set_dest(p, insn, dest); |
||
1223 | brw_set_src0(p, insn, src); |
||
1224 | brw_set_src1(p, insn, brw_null_reg()); |
||
1225 | } else { |
||
1226 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1227 | /* Example code doesn't set predicate_control for send |
||
1228 | * instructions. |
||
1229 | */ |
||
1230 | insn->header.predicate_control = 0; |
||
1231 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1232 | |||
1233 | brw_set_dest(p, insn, dest); |
||
1234 | brw_set_src0(p, insn, src); |
||
1235 | brw_set_math_message(p, insn, function, |
||
1236 | src.type == BRW_REGISTER_TYPE_D, |
||
1237 | precision, |
||
1238 | saturate, |
||
1239 | data_type); |
||
1240 | } |
||
1241 | } |
||
1242 | |||
1243 | /** Extended math function, float[8]. |
||
1244 | */ |
||
1245 | void brw_math2(struct brw_compile *p, |
||
1246 | struct brw_reg dest, |
||
1247 | unsigned function, |
||
1248 | struct brw_reg src0, |
||
1249 | struct brw_reg src1) |
||
1250 | { |
||
1251 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH); |
||
1252 | |||
1253 | assert(dest.file == BRW_GENERAL_REGISTER_FILE); |
||
1254 | assert(src0.file == BRW_GENERAL_REGISTER_FILE); |
||
1255 | assert(src1.file == BRW_GENERAL_REGISTER_FILE); |
||
1256 | |||
1257 | assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1258 | assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1259 | assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); |
||
1260 | |||
1261 | if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && |
||
1262 | function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { |
||
1263 | assert(src0.type == BRW_REGISTER_TYPE_F); |
||
1264 | assert(src1.type == BRW_REGISTER_TYPE_F); |
||
1265 | } |
||
1266 | |||
1267 | /* Source modifiers are ignored for extended math instructions. */ |
||
1268 | assert(!src0.negate); |
||
1269 | assert(!src0.abs); |
||
1270 | assert(!src1.negate); |
||
1271 | assert(!src1.abs); |
||
1272 | |||
1273 | /* Math is the same ISA format as other opcodes, except that CondModifier |
||
1274 | * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
||
1275 | */ |
||
1276 | insn->header.destreg__conditionalmod = function; |
||
1277 | |||
1278 | brw_set_dest(p, insn, dest); |
||
1279 | brw_set_src0(p, insn, src0); |
||
1280 | brw_set_src1(p, insn, src1); |
||
1281 | } |
||
1282 | |||
1283 | /** |
||
1284 | * Extended math function, float[16]. |
||
1285 | * Use 2 send instructions. |
||
1286 | */ |
||
1287 | void brw_math_16(struct brw_compile *p, |
||
1288 | struct brw_reg dest, |
||
1289 | unsigned function, |
||
1290 | unsigned saturate, |
||
1291 | unsigned msg_reg_nr, |
||
1292 | struct brw_reg src, |
||
1293 | unsigned precision) |
||
1294 | { |
||
1295 | struct brw_instruction *insn; |
||
1296 | |||
1297 | if (p->gen >= 060) { |
||
1298 | insn = brw_next_insn(p, BRW_OPCODE_MATH); |
||
1299 | |||
1300 | /* Math is the same ISA format as other opcodes, except that CondModifier |
||
1301 | * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. |
||
1302 | */ |
||
1303 | insn->header.destreg__conditionalmod = function; |
||
1304 | insn->header.saturate = saturate; |
||
1305 | |||
1306 | /* Source modifiers are ignored for extended math instructions. */ |
||
1307 | assert(!src.negate); |
||
1308 | assert(!src.abs); |
||
1309 | |||
1310 | brw_set_dest(p, insn, dest); |
||
1311 | brw_set_src0(p, insn, src); |
||
1312 | brw_set_src1(p, insn, brw_null_reg()); |
||
1313 | return; |
||
1314 | } |
||
1315 | |||
1316 | /* First instruction: |
||
1317 | */ |
||
1318 | brw_push_insn_state(p); |
||
1319 | brw_set_predicate_control_flag_value(p, 0xff); |
||
1320 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1321 | |||
1322 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1323 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1324 | |||
1325 | brw_set_dest(p, insn, dest); |
||
1326 | brw_set_src0(p, insn, src); |
||
1327 | brw_set_math_message(p, insn, function, |
||
1328 | BRW_MATH_INTEGER_UNSIGNED, |
||
1329 | precision, |
||
1330 | saturate, |
||
1331 | BRW_MATH_DATA_VECTOR); |
||
1332 | |||
1333 | /* Second instruction: |
||
1334 | */ |
||
1335 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1336 | insn->header.compression_control = BRW_COMPRESSION_2NDHALF; |
||
1337 | insn->header.destreg__conditionalmod = msg_reg_nr+1; |
||
1338 | |||
1339 | brw_set_dest(p, insn, __offset(dest,1)); |
||
1340 | brw_set_src0(p, insn, src); |
||
1341 | brw_set_math_message(p, insn, function, |
||
1342 | BRW_MATH_INTEGER_UNSIGNED, |
||
1343 | precision, |
||
1344 | saturate, |
||
1345 | BRW_MATH_DATA_VECTOR); |
||
1346 | |||
1347 | brw_pop_insn_state(p); |
||
1348 | } |
||
1349 | |||
1350 | /** |
||
1351 | * Write a block of OWORDs (half a GRF each) from the scratch buffer, |
||
1352 | * using a constant offset per channel. |
||
1353 | * |
||
1354 | * The offset must be aligned to oword size (16 bytes). Used for |
||
1355 | * register spilling. |
||
1356 | */ |
||
1357 | void brw_oword_block_write_scratch(struct brw_compile *p, |
||
1358 | struct brw_reg mrf, |
||
1359 | int num_regs, |
||
1360 | unsigned offset) |
||
1361 | { |
||
1362 | uint32_t msg_control, msg_type; |
||
1363 | int mlen; |
||
1364 | |||
1365 | if (p->gen >= 060) |
||
1366 | offset /= 16; |
||
1367 | |||
1368 | mrf = __retype_ud(mrf); |
||
1369 | |||
1370 | if (num_regs == 1) { |
||
1371 | msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
||
1372 | mlen = 2; |
||
1373 | } else { |
||
1374 | msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
||
1375 | mlen = 3; |
||
1376 | } |
||
1377 | |||
1378 | /* Set up the message header. This is g0, with g0.2 filled with |
||
1379 | * the offset. We don't want to leave our offset around in g0 or |
||
1380 | * it'll screw up texture samples, so set it up inside the message |
||
1381 | * reg. |
||
1382 | */ |
||
1383 | { |
||
1384 | brw_push_insn_state(p); |
||
1385 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1386 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1387 | |||
1388 | brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
||
1389 | |||
1390 | /* set message header global offset field (reg 0, element 2) */ |
||
1391 | brw_MOV(p, |
||
1392 | __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
||
1393 | brw_imm_ud(offset)); |
||
1394 | |||
1395 | brw_pop_insn_state(p); |
||
1396 | } |
||
1397 | |||
1398 | { |
||
1399 | struct brw_reg dest; |
||
1400 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1401 | int send_commit_msg; |
||
1402 | struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0)); |
||
1403 | |||
1404 | if (insn->header.compression_control != BRW_COMPRESSION_NONE) { |
||
1405 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1406 | src_header = vec16(src_header); |
||
1407 | } |
||
1408 | assert(insn->header.predicate_control == BRW_PREDICATE_NONE); |
||
1409 | insn->header.destreg__conditionalmod = mrf.nr; |
||
1410 | |||
1411 | /* Until gen6, writes followed by reads from the same location |
||
1412 | * are not guaranteed to be ordered unless write_commit is set. |
||
1413 | * If set, then a no-op write is issued to the destination |
||
1414 | * register to set a dependency, and a read from the destination |
||
1415 | * can be used to ensure the ordering. |
||
1416 | * |
||
1417 | * For gen6, only writes between different threads need ordering |
||
1418 | * protection. Our use of DP writes is all about register |
||
1419 | * spilling within a thread. |
||
1420 | */ |
||
1421 | if (p->gen >= 060) { |
||
1422 | dest = __retype_uw(vec16(brw_null_reg())); |
||
1423 | send_commit_msg = 0; |
||
1424 | } else { |
||
1425 | dest = src_header; |
||
1426 | send_commit_msg = 1; |
||
1427 | } |
||
1428 | |||
1429 | brw_set_dest(p, insn, dest); |
||
1430 | if (p->gen >= 060) { |
||
1431 | brw_set_src0(p, insn, mrf); |
||
1432 | } else { |
||
1433 | brw_set_src0(p, insn, brw_null_reg()); |
||
1434 | } |
||
1435 | |||
1436 | if (p->gen >= 060) |
||
1437 | msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
||
1438 | else |
||
1439 | msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; |
||
1440 | |||
1441 | brw_set_dp_write_message(p, |
||
1442 | insn, |
||
1443 | 255, /* binding table index (255=stateless) */ |
||
1444 | msg_control, |
||
1445 | msg_type, |
||
1446 | mlen, |
||
1447 | true, /* header_present */ |
||
1448 | 0, /* pixel scoreboard */ |
||
1449 | send_commit_msg, /* response_length */ |
||
1450 | 0, /* eot */ |
||
1451 | send_commit_msg); |
||
1452 | } |
||
1453 | } |
||
1454 | |||
1455 | |||
1456 | /** |
||
1457 | * Read a block of owords (half a GRF each) from the scratch buffer |
||
1458 | * using a constant index per channel. |
||
1459 | * |
||
1460 | * Offset must be aligned to oword size (16 bytes). Used for register |
||
1461 | * spilling. |
||
1462 | */ |
||
1463 | void |
||
1464 | brw_oword_block_read_scratch(struct brw_compile *p, |
||
1465 | struct brw_reg dest, |
||
1466 | struct brw_reg mrf, |
||
1467 | int num_regs, |
||
1468 | unsigned offset) |
||
1469 | { |
||
1470 | uint32_t msg_control; |
||
1471 | int rlen; |
||
1472 | |||
1473 | if (p->gen >= 060) |
||
1474 | offset /= 16; |
||
1475 | |||
1476 | mrf = __retype_ud(mrf); |
||
1477 | dest = __retype_uw(dest); |
||
1478 | |||
1479 | if (num_regs == 1) { |
||
1480 | msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS; |
||
1481 | rlen = 1; |
||
1482 | } else { |
||
1483 | msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS; |
||
1484 | rlen = 2; |
||
1485 | } |
||
1486 | |||
1487 | { |
||
1488 | brw_push_insn_state(p); |
||
1489 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1490 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1491 | |||
1492 | brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
||
1493 | |||
1494 | /* set message header global offset field (reg 0, element 2) */ |
||
1495 | brw_MOV(p, |
||
1496 | __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
||
1497 | brw_imm_ud(offset)); |
||
1498 | |||
1499 | brw_pop_insn_state(p); |
||
1500 | } |
||
1501 | |||
1502 | { |
||
1503 | struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1504 | |||
1505 | assert(insn->header.predicate_control == 0); |
||
1506 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1507 | insn->header.destreg__conditionalmod = mrf.nr; |
||
1508 | |||
1509 | brw_set_dest(p, insn, dest); /* UW? */ |
||
1510 | if (p->gen >= 060) { |
||
1511 | brw_set_src0(p, insn, mrf); |
||
1512 | } else { |
||
1513 | brw_set_src0(p, insn, brw_null_reg()); |
||
1514 | } |
||
1515 | |||
1516 | brw_set_dp_read_message(p, |
||
1517 | insn, |
||
1518 | 255, /* binding table index (255=stateless) */ |
||
1519 | msg_control, |
||
1520 | BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ |
||
1521 | BRW_DATAPORT_READ_TARGET_RENDER_CACHE, |
||
1522 | 1, /* msg_length */ |
||
1523 | rlen); |
||
1524 | } |
||
1525 | } |
||
1526 | |||
1527 | /** |
||
1528 | * Read a float[4] vector from the data port Data Cache (const buffer). |
||
1529 | * Location (in buffer) should be a multiple of 16. |
||
1530 | * Used for fetching shader constants. |
||
1531 | */ |
||
1532 | void brw_oword_block_read(struct brw_compile *p, |
||
1533 | struct brw_reg dest, |
||
1534 | struct brw_reg mrf, |
||
1535 | uint32_t offset, |
||
1536 | uint32_t bind_table_index) |
||
1537 | { |
||
1538 | struct brw_instruction *insn; |
||
1539 | |||
1540 | /* On newer hardware, offset is in units of owords. */ |
||
1541 | if (p->gen >= 060) |
||
1542 | offset /= 16; |
||
1543 | |||
1544 | mrf = __retype_ud(mrf); |
||
1545 | |||
1546 | brw_push_insn_state(p); |
||
1547 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1548 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1549 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1550 | |||
1551 | brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
||
1552 | |||
1553 | /* set message header global offset field (reg 0, element 2) */ |
||
1554 | brw_MOV(p, |
||
1555 | __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)), |
||
1556 | brw_imm_ud(offset)); |
||
1557 | |||
1558 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1559 | insn->header.destreg__conditionalmod = mrf.nr; |
||
1560 | |||
1561 | /* cast dest to a uword[8] vector */ |
||
1562 | dest = __retype_uw(vec8(dest)); |
||
1563 | |||
1564 | brw_set_dest(p, insn, dest); |
||
1565 | if (p->gen >= 060) { |
||
1566 | brw_set_src0(p, insn, mrf); |
||
1567 | } else { |
||
1568 | brw_set_src0(p, insn, brw_null_reg()); |
||
1569 | } |
||
1570 | |||
1571 | brw_set_dp_read_message(p, |
||
1572 | insn, |
||
1573 | bind_table_index, |
||
1574 | BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, |
||
1575 | BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, |
||
1576 | BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
||
1577 | 1, /* msg_length */ |
||
1578 | 1); /* response_length (1 reg, 2 owords!) */ |
||
1579 | |||
1580 | brw_pop_insn_state(p); |
||
1581 | } |
||
1582 | |||
1583 | /** |
||
1584 | * Read a set of dwords from the data port Data Cache (const buffer). |
||
1585 | * |
||
1586 | * Location (in buffer) appears as UD offsets in the register after |
||
1587 | * the provided mrf header reg. |
||
1588 | */ |
||
1589 | void brw_dword_scattered_read(struct brw_compile *p, |
||
1590 | struct brw_reg dest, |
||
1591 | struct brw_reg mrf, |
||
1592 | uint32_t bind_table_index) |
||
1593 | { |
||
1594 | struct brw_instruction *insn; |
||
1595 | |||
1596 | mrf = __retype_ud(mrf); |
||
1597 | |||
1598 | brw_push_insn_state(p); |
||
1599 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1600 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1601 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1602 | brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0))); |
||
1603 | brw_pop_insn_state(p); |
||
1604 | |||
1605 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1606 | insn->header.destreg__conditionalmod = mrf.nr; |
||
1607 | |||
1608 | /* cast dest to a uword[8] vector */ |
||
1609 | dest = __retype_uw(vec8(dest)); |
||
1610 | |||
1611 | brw_set_dest(p, insn, dest); |
||
1612 | brw_set_src0(p, insn, brw_null_reg()); |
||
1613 | |||
1614 | brw_set_dp_read_message(p, |
||
1615 | insn, |
||
1616 | bind_table_index, |
||
1617 | BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, |
||
1618 | BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, |
||
1619 | BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
||
1620 | 2, /* msg_length */ |
||
1621 | 1); /* response_length */ |
||
1622 | } |
||
1623 | |||
1624 | /** |
||
1625 | * Read float[4] constant(s) from VS constant buffer. |
||
1626 | * For relative addressing, two float[4] constants will be read into 'dest'. |
||
1627 | * Otherwise, one float[4] constant will be read into the lower half of 'dest'. |
||
1628 | */ |
||
1629 | void brw_dp_READ_4_vs(struct brw_compile *p, |
||
1630 | struct brw_reg dest, |
||
1631 | unsigned location, |
||
1632 | unsigned bind_table_index) |
||
1633 | { |
||
1634 | struct brw_instruction *insn; |
||
1635 | unsigned msg_reg_nr = 1; |
||
1636 | |||
1637 | if (p->gen >= 060) |
||
1638 | location /= 16; |
||
1639 | |||
1640 | /* Setup MRF[1] with location/offset into const buffer */ |
||
1641 | brw_push_insn_state(p); |
||
1642 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1643 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1644 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1645 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1646 | brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)), |
||
1647 | brw_imm_ud(location)); |
||
1648 | brw_pop_insn_state(p); |
||
1649 | |||
1650 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1651 | |||
1652 | insn->header.predicate_control = BRW_PREDICATE_NONE; |
||
1653 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1654 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1655 | insn->header.mask_control = BRW_MASK_DISABLE; |
||
1656 | |||
1657 | brw_set_dest(p, insn, dest); |
||
1658 | if (p->gen >= 060) { |
||
1659 | brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); |
||
1660 | } else { |
||
1661 | brw_set_src0(p, insn, brw_null_reg()); |
||
1662 | } |
||
1663 | |||
1664 | brw_set_dp_read_message(p, |
||
1665 | insn, |
||
1666 | bind_table_index, |
||
1667 | 0, |
||
1668 | BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ |
||
1669 | BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
||
1670 | 1, /* msg_length */ |
||
1671 | 1); /* response_length (1 Oword) */ |
||
1672 | } |
||
1673 | |||
1674 | /** |
||
1675 | * Read a float[4] constant per vertex from VS constant buffer, with |
||
1676 | * relative addressing. |
||
1677 | */ |
||
1678 | void brw_dp_READ_4_vs_relative(struct brw_compile *p, |
||
1679 | struct brw_reg dest, |
||
1680 | struct brw_reg addr_reg, |
||
1681 | unsigned offset, |
||
1682 | unsigned bind_table_index) |
||
1683 | { |
||
1684 | struct brw_reg src = brw_vec8_grf(0, 0); |
||
1685 | struct brw_instruction *insn; |
||
1686 | int msg_type; |
||
1687 | |||
1688 | /* Setup MRF[1] with offset into const buffer */ |
||
1689 | brw_push_insn_state(p); |
||
1690 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1691 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1692 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1693 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1694 | |||
1695 | /* M1.0 is block offset 0, M1.4 is block offset 1, all other |
||
1696 | * fields ignored. |
||
1697 | */ |
||
1698 | brw_ADD(p, __retype_d(brw_message_reg(1)), |
||
1699 | addr_reg, brw_imm_d(offset)); |
||
1700 | brw_pop_insn_state(p); |
||
1701 | |||
1702 | gen6_resolve_implied_move(p, &src, 0); |
||
1703 | |||
1704 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1705 | insn->header.predicate_control = BRW_PREDICATE_NONE; |
||
1706 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1707 | insn->header.destreg__conditionalmod = 0; |
||
1708 | insn->header.mask_control = BRW_MASK_DISABLE; |
||
1709 | |||
1710 | brw_set_dest(p, insn, dest); |
||
1711 | brw_set_src0(p, insn, src); |
||
1712 | |||
1713 | if (p->gen >= 060) |
||
1714 | msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
||
1715 | else if (p->gen >= 045) |
||
1716 | msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
||
1717 | else |
||
1718 | msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; |
||
1719 | |||
1720 | brw_set_dp_read_message(p, |
||
1721 | insn, |
||
1722 | bind_table_index, |
||
1723 | BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, |
||
1724 | msg_type, |
||
1725 | BRW_DATAPORT_READ_TARGET_DATA_CACHE, |
||
1726 | 2, /* msg_length */ |
||
1727 | 1); /* response_length */ |
||
1728 | } |
||
1729 | |||
1730 | void brw_fb_WRITE(struct brw_compile *p, |
||
1731 | int dispatch_width, |
||
1732 | unsigned msg_reg_nr, |
||
1733 | struct brw_reg src0, |
||
1734 | unsigned msg_control, |
||
1735 | unsigned binding_table_index, |
||
1736 | unsigned msg_length, |
||
1737 | unsigned response_length, |
||
1738 | bool eot, |
||
1739 | bool header_present) |
||
1740 | { |
||
1741 | struct brw_instruction *insn; |
||
1742 | unsigned msg_type; |
||
1743 | struct brw_reg dest; |
||
1744 | |||
1745 | if (dispatch_width == 16) |
||
1746 | dest = __retype_uw(vec16(brw_null_reg())); |
||
1747 | else |
||
1748 | dest = __retype_uw(vec8(brw_null_reg())); |
||
1749 | |||
1750 | if (p->gen >= 060 && binding_table_index == 0) { |
||
1751 | insn = brw_next_insn(p, BRW_OPCODE_SENDC); |
||
1752 | } else { |
||
1753 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1754 | } |
||
1755 | /* The execution mask is ignored for render target writes. */ |
||
1756 | insn->header.predicate_control = 0; |
||
1757 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1758 | |||
1759 | if (p->gen >= 060) { |
||
1760 | /* headerless version, just submit color payload */ |
||
1761 | src0 = brw_message_reg(msg_reg_nr); |
||
1762 | |||
1763 | msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
||
1764 | } else { |
||
1765 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1766 | |||
1767 | msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; |
||
1768 | } |
||
1769 | |||
1770 | brw_set_dest(p, insn, dest); |
||
1771 | brw_set_src0(p, insn, src0); |
||
1772 | brw_set_dp_write_message(p, |
||
1773 | insn, |
||
1774 | binding_table_index, |
||
1775 | msg_control, |
||
1776 | msg_type, |
||
1777 | msg_length, |
||
1778 | header_present, |
||
1779 | eot, |
||
1780 | response_length, |
||
1781 | eot, |
||
1782 | |||
1783 | } |
||
1784 | |||
1785 | /** |
||
1786 | * Texture sample instruction. |
||
1787 | * Note: the msg_type plus msg_length values determine exactly what kind |
||
1788 | * of sampling operation is performed. See volume 4, page 161 of docs. |
||
1789 | */ |
||
1790 | void brw_SAMPLE(struct brw_compile *p, |
||
1791 | struct brw_reg dest, |
||
1792 | unsigned msg_reg_nr, |
||
1793 | struct brw_reg src0, |
||
1794 | unsigned binding_table_index, |
||
1795 | unsigned sampler, |
||
1796 | unsigned writemask, |
||
1797 | unsigned msg_type, |
||
1798 | unsigned response_length, |
||
1799 | unsigned msg_length, |
||
1800 | bool header_present, |
||
1801 | unsigned simd_mode) |
||
1802 | { |
||
1803 | assert(writemask); |
||
1804 | |||
1805 | if (p->gen < 050 || writemask != WRITEMASK_XYZW) { |
||
1806 | struct brw_reg m1 = brw_message_reg(msg_reg_nr); |
||
1807 | |||
1808 | writemask = ~writemask & WRITEMASK_XYZW; |
||
1809 | |||
1810 | brw_push_insn_state(p); |
||
1811 | |||
1812 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1813 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
1814 | |||
1815 | brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0))); |
||
1816 | brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12)); |
||
1817 | |||
1818 | brw_pop_insn_state(p); |
||
1819 | |||
1820 | src0 = __retype_uw(brw_null_reg()); |
||
1821 | } |
||
1822 | |||
1823 | { |
||
1824 | struct brw_instruction *insn; |
||
1825 | |||
1826 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
1827 | |||
1828 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1829 | insn->header.predicate_control = 0; /* XXX */ |
||
1830 | insn->header.compression_control = BRW_COMPRESSION_NONE; |
||
1831 | if (p->gen < 060) |
||
1832 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1833 | |||
1834 | brw_set_dest(p, insn, dest); |
||
1835 | brw_set_src0(p, insn, src0); |
||
1836 | brw_set_sampler_message(p, insn, |
||
1837 | binding_table_index, |
||
1838 | sampler, |
||
1839 | msg_type, |
||
1840 | response_length, |
||
1841 | msg_length, |
||
1842 | header_present, |
||
1843 | simd_mode); |
||
1844 | } |
||
1845 | } |
||
1846 | |||
1847 | /* All these variables are pretty confusing - we might be better off |
||
1848 | * using bitmasks and macros for this, in the old style. Or perhaps |
||
1849 | * just having the caller instantiate the fields in dword3 itself. |
||
1850 | */ |
||
1851 | void brw_urb_WRITE(struct brw_compile *p, |
||
1852 | struct brw_reg dest, |
||
1853 | unsigned msg_reg_nr, |
||
1854 | struct brw_reg src0, |
||
1855 | bool allocate, |
||
1856 | bool used, |
||
1857 | unsigned msg_length, |
||
1858 | unsigned response_length, |
||
1859 | bool eot, |
||
1860 | bool writes_complete, |
||
1861 | unsigned offset, |
||
1862 | unsigned swizzle) |
||
1863 | { |
||
1864 | struct brw_instruction *insn; |
||
1865 | |||
1866 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
1867 | |||
1868 | if (p->gen >= 070) { |
||
1869 | /* Enable Channel Masks in the URB_WRITE_HWORD message header */ |
||
1870 | brw_push_insn_state(p); |
||
1871 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1872 | brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)), |
||
1873 | __retype_ud(brw_vec1_grf(0, 5)), |
||
1874 | brw_imm_ud(0xff00)); |
||
1875 | brw_pop_insn_state(p); |
||
1876 | } |
||
1877 | |||
1878 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1879 | |||
1880 | assert(msg_length < BRW_MAX_MRF); |
||
1881 | |||
1882 | brw_set_dest(p, insn, dest); |
||
1883 | brw_set_src0(p, insn, src0); |
||
1884 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
1885 | |||
1886 | if (p->gen <= 060) |
||
1887 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1888 | |||
1889 | brw_set_urb_message(p, |
||
1890 | insn, |
||
1891 | allocate, |
||
1892 | used, |
||
1893 | msg_length, |
||
1894 | response_length, |
||
1895 | eot, |
||
1896 | writes_complete, |
||
1897 | offset, |
||
1898 | swizzle); |
||
1899 | } |
||
1900 | |||
1901 | static int |
||
1902 | brw_find_next_block_end(struct brw_compile *p, int start) |
||
1903 | { |
||
1904 | int ip; |
||
1905 | |||
1906 | for (ip = start + 1; ip < p->nr_insn; ip++) { |
||
1907 | struct brw_instruction *insn = &p->store[ip]; |
||
1908 | |||
1909 | switch (insn->header.opcode) { |
||
1910 | case BRW_OPCODE_ENDIF: |
||
1911 | case BRW_OPCODE_ELSE: |
||
1912 | case BRW_OPCODE_WHILE: |
||
1913 | return ip; |
||
1914 | } |
||
1915 | } |
||
1916 | assert(!"not reached"); |
||
1917 | return start + 1; |
||
1918 | } |
||
1919 | |||
1920 | /* There is no DO instruction on gen6, so to find the end of the loop |
||
1921 | * we have to see if the loop is jumping back before our start |
||
1922 | * instruction. |
||
1923 | */ |
||
1924 | static int |
||
1925 | brw_find_loop_end(struct brw_compile *p, int start) |
||
1926 | { |
||
1927 | int ip; |
||
1928 | int br = 2; |
||
1929 | |||
1930 | for (ip = start + 1; ip < p->nr_insn; ip++) { |
||
1931 | struct brw_instruction *insn = &p->store[ip]; |
||
1932 | |||
1933 | if (insn->header.opcode == BRW_OPCODE_WHILE) { |
||
1934 | int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count |
||
1935 | : insn->bits3.break_cont.jip; |
||
1936 | if (ip + jip / br <= start) |
||
1937 | return ip; |
||
1938 | } |
||
1939 | } |
||
1940 | assert(!"not reached"); |
||
1941 | return start + 1; |
||
1942 | } |
||
1943 | |||
1944 | /* After program generation, go back and update the UIP and JIP of |
||
1945 | * BREAK and CONT instructions to their correct locations. |
||
1946 | */ |
||
1947 | void |
||
1948 | brw_set_uip_jip(struct brw_compile *p) |
||
1949 | { |
||
1950 | int ip; |
||
1951 | int br = 2; |
||
1952 | |||
1953 | if (p->gen <= 060) |
||
1954 | return; |
||
1955 | |||
1956 | for (ip = 0; ip < p->nr_insn; ip++) { |
||
1957 | struct brw_instruction *insn = &p->store[ip]; |
||
1958 | |||
1959 | switch (insn->header.opcode) { |
||
1960 | case BRW_OPCODE_BREAK: |
||
1961 | insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); |
||
1962 | /* Gen7 UIP points to WHILE; Gen6 points just after it */ |
||
1963 | insn->bits3.break_cont.uip = |
||
1964 | br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0)); |
||
1965 | break; |
||
1966 | case BRW_OPCODE_CONTINUE: |
||
1967 | insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); |
||
1968 | insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); |
||
1969 | |||
1970 | assert(insn->bits3.break_cont.uip != 0); |
||
1971 | assert(insn->bits3.break_cont.jip != 0); |
||
1972 | break; |
||
1973 | } |
||
1974 | } |
||
1975 | } |
||
1976 | |||
1977 | void brw_ff_sync(struct brw_compile *p, |
||
1978 | struct brw_reg dest, |
||
1979 | unsigned msg_reg_nr, |
||
1980 | struct brw_reg src0, |
||
1981 | bool allocate, |
||
1982 | unsigned response_length, |
||
1983 | bool eot) |
||
1984 | { |
||
1985 | struct brw_instruction *insn; |
||
1986 | |||
1987 | gen6_resolve_implied_move(p, &src0, msg_reg_nr); |
||
1988 | |||
1989 | insn = brw_next_insn(p, BRW_OPCODE_SEND); |
||
1990 | brw_set_dest(p, insn, dest); |
||
1991 | brw_set_src0(p, insn, src0); |
||
1992 | brw_set_src1(p, insn, brw_imm_d(0)); |
||
1993 | |||
1994 | if (p->gen < 060) |
||
1995 | insn->header.destreg__conditionalmod = msg_reg_nr; |
||
1996 | |||
1997 | brw_set_ff_sync_message(p, |
||
1998 | insn, |
||
1999 | allocate, |
||
2000 | response_length, |
||
2001 | eot); |
||
2002 | }>=>>=>=>=>>>=>>>><>>>>>>>>>>=>>>>>>=>>>>> |