Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
 Intel funded Tungsten Graphics to
4
 develop this 3D driver.
5
 
6
 Permission is hereby granted, free of charge, to any person obtaining
7
 a copy of this software and associated documentation files (the
8
 "Software"), to deal in the Software without restriction, including
9
 without limitation the rights to use, copy, modify, merge, publish,
10
 distribute, sublicense, and/or sell copies of the Software, and to
11
 permit persons to whom the Software is furnished to do so, subject to
12
 the following conditions:
13
 
14
 The above copyright notice and this permission notice (including the
15
 next paragraph) shall be included in all copies or substantial
16
 portions of the Software.
17
 
18
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
 /*
28
  * Authors:
29
  *   Keith Whitwell 
30
  */
31
 
32
 
33
#include "brw_context.h"
34
#include "brw_defines.h"
35
#include "brw_eu.h"
36
 
37
#include "util/ralloc.h"
38
 
39
/**
40
 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
41
 * registers, implicitly moving the operand to a message register.
42
 *
43
 * On Sandybridge, this is no longer the case.  This function performs the
44
 * explicit move; it should be called before emitting a SEND instruction.
45
 */
46
void
47
gen6_resolve_implied_move(struct brw_codegen *p,
48
			  struct brw_reg *src,
49
			  unsigned msg_reg_nr)
50
{
51
   const struct brw_device_info *devinfo = p->devinfo;
52
   if (devinfo->gen < 6)
53
      return;
54
 
55
   if (src->file == BRW_MESSAGE_REGISTER_FILE)
56
      return;
57
 
58
   if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
59
      brw_push_insn_state(p);
60
      brw_set_default_exec_size(p, BRW_EXECUTE_8);
61
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
62
      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
63
      brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
64
	      retype(*src, BRW_REGISTER_TYPE_UD));
65
      brw_pop_insn_state(p);
66
   }
67
   *src = brw_message_reg(msg_reg_nr);
68
}
69
 
70
static void
71
gen7_convert_mrf_to_grf(struct brw_codegen *p, struct brw_reg *reg)
72
{
73
   /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
74
    * "The send with EOT should use register space R112-R127 for . This is
75
    *  to enable loading of a new thread into the same slot while the message
76
    *  with EOT for current thread is pending dispatch."
77
    *
78
    * Since we're pretending to have 16 MRFs anyway, we may as well use the
79
    * registers required for messages with EOT.
80
    */
81
   const struct brw_device_info *devinfo = p->devinfo;
82
   if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
83
      reg->file = BRW_GENERAL_REGISTER_FILE;
84
      reg->nr += GEN7_MRF_HACK_START;
85
   }
86
}
87
 
88
/**
89
 * Convert a brw_reg_type enumeration value into the hardware representation.
90
 *
91
 * The hardware encoding may depend on whether the value is an immediate.
92
 */
93
unsigned
94
brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
95
                        enum brw_reg_type type, unsigned file)
96
{
97
   if (file == BRW_IMMEDIATE_VALUE) {
98
      const static int imm_hw_types[] = {
99
         [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
100
         [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
101
         [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
102
         [BRW_REGISTER_TYPE_W]  = BRW_HW_REG_TYPE_W,
103
         [BRW_REGISTER_TYPE_F]  = BRW_HW_REG_TYPE_F,
104
         [BRW_REGISTER_TYPE_UB] = -1,
105
         [BRW_REGISTER_TYPE_B]  = -1,
106
         [BRW_REGISTER_TYPE_UV] = BRW_HW_REG_IMM_TYPE_UV,
107
         [BRW_REGISTER_TYPE_VF] = BRW_HW_REG_IMM_TYPE_VF,
108
         [BRW_REGISTER_TYPE_V]  = BRW_HW_REG_IMM_TYPE_V,
109
         [BRW_REGISTER_TYPE_DF] = GEN8_HW_REG_IMM_TYPE_DF,
110
         [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_IMM_TYPE_HF,
111
         [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
112
         [BRW_REGISTER_TYPE_Q]  = GEN8_HW_REG_TYPE_Q,
113
      };
114
      assert(type < ARRAY_SIZE(imm_hw_types));
115
      assert(imm_hw_types[type] != -1);
116
      assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_DF);
117
      return imm_hw_types[type];
118
   } else {
119
      /* Non-immediate registers */
120
      const static int hw_types[] = {
121
         [BRW_REGISTER_TYPE_UD] = BRW_HW_REG_TYPE_UD,
122
         [BRW_REGISTER_TYPE_D]  = BRW_HW_REG_TYPE_D,
123
         [BRW_REGISTER_TYPE_UW] = BRW_HW_REG_TYPE_UW,
124
         [BRW_REGISTER_TYPE_W]  = BRW_HW_REG_TYPE_W,
125
         [BRW_REGISTER_TYPE_UB] = BRW_HW_REG_NON_IMM_TYPE_UB,
126
         [BRW_REGISTER_TYPE_B]  = BRW_HW_REG_NON_IMM_TYPE_B,
127
         [BRW_REGISTER_TYPE_F]  = BRW_HW_REG_TYPE_F,
128
         [BRW_REGISTER_TYPE_UV] = -1,
129
         [BRW_REGISTER_TYPE_VF] = -1,
130
         [BRW_REGISTER_TYPE_V]  = -1,
131
         [BRW_REGISTER_TYPE_DF] = GEN7_HW_REG_NON_IMM_TYPE_DF,
132
         [BRW_REGISTER_TYPE_HF] = GEN8_HW_REG_NON_IMM_TYPE_HF,
133
         [BRW_REGISTER_TYPE_UQ] = GEN8_HW_REG_TYPE_UQ,
134
         [BRW_REGISTER_TYPE_Q]  = GEN8_HW_REG_TYPE_Q,
135
      };
136
      assert(type < ARRAY_SIZE(hw_types));
137
      assert(hw_types[type] != -1);
138
      assert(devinfo->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
139
      assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
140
      return hw_types[type];
141
   }
142
}
143
 
144
void
145
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
146
{
147
   const struct brw_device_info *devinfo = p->devinfo;
148
 
149
   if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
150
       dest.file != BRW_MESSAGE_REGISTER_FILE)
151
      assert(dest.nr < 128);
152
 
153
   gen7_convert_mrf_to_grf(p, &dest);
154
 
155
   brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
156
   brw_inst_set_dst_reg_type(devinfo, inst,
157
                             brw_reg_type_to_hw_type(devinfo, dest.type,
158
                                                     dest.file));
159
   brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
160
 
161
   if (dest.address_mode == BRW_ADDRESS_DIRECT) {
162
      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
163
 
164
      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
165
         brw_inst_set_dst_da1_subreg_nr(devinfo, inst, dest.subnr);
166
	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
167
	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
168
         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
169
      } else {
170
         brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
171
         brw_inst_set_da16_writemask(devinfo, inst, dest.dw1.bits.writemask);
172
         if (dest.file == BRW_GENERAL_REGISTER_FILE ||
173
             dest.file == BRW_MESSAGE_REGISTER_FILE) {
174
            assert(dest.dw1.bits.writemask != 0);
175
         }
176
	 /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
177
	  *    Although Dst.HorzStride is a don't care for Align16, HW needs
178
	  *    this to be programmed as "01".
179
	  */
180
         brw_inst_set_dst_hstride(devinfo, inst, 1);
181
      }
182
   } else {
183
      brw_inst_set_dst_ia_subreg_nr(devinfo, inst, dest.subnr);
184
 
185
      /* These are different sizes in align1 vs align16:
186
       */
187
      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
188
         brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
189
                                       dest.dw1.bits.indirect_offset);
190
	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
191
	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
192
         brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
193
      } else {
194
         brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
195
                                        dest.dw1.bits.indirect_offset);
196
	 /* even ignored in da16, still need to set as '01' */
197
         brw_inst_set_dst_hstride(devinfo, inst, 1);
198
      }
199
   }
200
 
201
   /* Generators should set a default exec_size of either 8 (SIMD4x2 or SIMD8)
202
    * or 16 (SIMD16), as that's normally correct.  However, when dealing with
203
    * small registers, we automatically reduce it to match the register size.
204
    */
205
   if (dest.width < BRW_EXECUTE_8)
206
      brw_inst_set_exec_size(devinfo, inst, dest.width);
207
}
208
 
209
extern int reg_type_size[];
210
 
211
static void
212
validate_reg(const struct brw_device_info *devinfo,
213
             brw_inst *inst, struct brw_reg reg)
214
{
215
   const int hstride_for_reg[] = {0, 1, 2, 4};
216
   const int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32};
217
   const int width_for_reg[] = {1, 2, 4, 8, 16};
218
   const int execsize_for_reg[] = {1, 2, 4, 8, 16, 32};
219
   int width, hstride, vstride, execsize;
220
 
221
   if (reg.file == BRW_IMMEDIATE_VALUE) {
222
      /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
223
       * mean the destination has to be 128-bit aligned and the
224
       * destination horiz stride has to be a word.
225
       */
226
      if (reg.type == BRW_REGISTER_TYPE_V) {
227
         assert(hstride_for_reg[brw_inst_dst_hstride(devinfo, inst)] *
228
                reg_type_size[brw_inst_dst_reg_type(devinfo, inst)] == 2);
229
      }
230
 
231
      return;
232
   }
233
 
234
   if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
235
       reg.file == BRW_ARF_NULL)
236
      return;
237
 
238
   assert(reg.hstride >= 0 && reg.hstride < ARRAY_SIZE(hstride_for_reg));
239
   hstride = hstride_for_reg[reg.hstride];
240
 
241
   if (reg.vstride == 0xf) {
242
      vstride = -1;
243
   } else {
244
      assert(reg.vstride >= 0 && reg.vstride < ARRAY_SIZE(vstride_for_reg));
245
      vstride = vstride_for_reg[reg.vstride];
246
   }
247
 
248
   assert(reg.width >= 0 && reg.width < ARRAY_SIZE(width_for_reg));
249
   width = width_for_reg[reg.width];
250
 
251
   assert(brw_inst_exec_size(devinfo, inst) >= 0 &&
252
          brw_inst_exec_size(devinfo, inst) < ARRAY_SIZE(execsize_for_reg));
253
   execsize = execsize_for_reg[brw_inst_exec_size(devinfo, inst)];
254
 
255
   /* Restrictions from 3.3.10: Register Region Restrictions. */
256
   /* 3. */
257
   assert(execsize >= width);
258
 
259
   /* 4. */
260
   if (execsize == width && hstride != 0) {
261
      assert(vstride == -1 || vstride == width * hstride);
262
   }
263
 
264
   /* 5. */
265
   if (execsize == width && hstride == 0) {
266
      /* no restriction on vstride. */
267
   }
268
 
269
   /* 6. */
270
   if (width == 1) {
271
      assert(hstride == 0);
272
   }
273
 
274
   /* 7. */
275
   if (execsize == 1 && width == 1) {
276
      assert(hstride == 0);
277
      assert(vstride == 0);
278
   }
279
 
280
   /* 8. */
281
   if (vstride == 0 && hstride == 0) {
282
      assert(width == 1);
283
   }
284
 
285
   /* 10. Check destination issues. */
286
}
287
 
288
static bool
289
is_compactable_immediate(unsigned imm)
290
{
291
   /* We get the low 12 bits as-is. */
292
   imm &= ~0xfff;
293
 
294
   /* We get one bit replicated through the top 20 bits. */
295
   return imm == 0 || imm == 0xfffff000;
296
}
297
 
298
void
299
brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
300
{
301
   const struct brw_device_info *devinfo = p->devinfo;
302
 
303
   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
304
      assert(reg.nr < 128);
305
 
306
   gen7_convert_mrf_to_grf(p, ®);
307
 
308
   if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
309
                             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
310
      /* Any source modifiers or regions will be ignored, since this just
311
       * identifies the MRF/GRF to start reading the message contents from.
312
       * Check for some likely failures.
313
       */
314
      assert(!reg.negate);
315
      assert(!reg.abs);
316
      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
317
   }
318
 
319
   validate_reg(devinfo, inst, reg);
320
 
321
   brw_inst_set_src0_reg_file(devinfo, inst, reg.file);
322
   brw_inst_set_src0_reg_type(devinfo, inst,
323
                              brw_reg_type_to_hw_type(devinfo, reg.type, reg.file));
324
   brw_inst_set_src0_abs(devinfo, inst, reg.abs);
325
   brw_inst_set_src0_negate(devinfo, inst, reg.negate);
326
   brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
327
 
328
   if (reg.file == BRW_IMMEDIATE_VALUE) {
329
      brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
330
 
331
      /* The Bspec's section titled "Non-present Operands" claims that if src0
332
       * is an immediate that src1's type must be the same as that of src0.
333
       *
334
       * The SNB+ DataTypeIndex instruction compaction tables contain mappings
335
       * that do not follow this rule. E.g., from the IVB/HSW table:
336
       *
337
       *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
338
       *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
339
       *
340
       * And from the SNB table:
341
       *
342
       *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
343
       *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
344
       *
345
       * Neither of these cause warnings from the simulator when used,
346
       * compacted or otherwise. In fact, all compaction mappings that have an
347
       * immediate in src0 use a:ud for src1.
348
       *
349
       * The GM45 instruction compaction tables do not contain mapped meanings
350
       * so it's not clear whether it has the restriction. We'll assume it was
351
       * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
352
       */
353
      brw_inst_set_src1_reg_file(devinfo, inst, BRW_ARCHITECTURE_REGISTER_FILE);
354
      if (devinfo->gen < 6) {
355
         brw_inst_set_src1_reg_type(devinfo, inst,
356
                                    brw_inst_src0_reg_type(devinfo, inst));
357
      } else {
358
         brw_inst_set_src1_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD);
359
      }
360
 
361
      /* Compacted instructions only have 12-bits (plus 1 for the other 20)
362
       * for immediate values. Presumably the hardware engineers realized
363
       * that the only useful floating-point value that could be represented
364
       * in this format is 0.0, which can also be represented as a VF-typed
365
       * immediate, so they gave us the previously mentioned mapping on IVB+.
366
       *
367
       * Strangely, we do have a mapping for imm:f in src1, so we don't need
368
       * to do this there.
369
       *
370
       * If we see a 0.0:F, change the type to VF so that it can be compacted.
371
       */
372
      if (brw_inst_imm_ud(devinfo, inst) == 0x0 &&
373
          brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_F) {
374
         brw_inst_set_src0_reg_type(devinfo, inst, BRW_HW_REG_IMM_TYPE_VF);
375
      }
376
 
377
      /* There are no mappings for dst:d | i:d, so if the immediate is suitable
378
       * set the types to :UD so the instruction can be compacted.
379
       */
380
      if (is_compactable_immediate(brw_inst_imm_ud(devinfo, inst)) &&
381
          brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE &&
382
          brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_D &&
383
          brw_inst_dst_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_D) {
384
         brw_inst_set_src0_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD);
385
         brw_inst_set_dst_reg_type(devinfo, inst, BRW_HW_REG_TYPE_UD);
386
      }
387
   } else {
388
      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
389
         brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
390
         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
391
             brw_inst_set_src0_da1_subreg_nr(devinfo, inst, reg.subnr);
392
	 } else {
393
            brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
394
	 }
395
      } else {
396
         brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.subnr);
397
 
398
         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
399
            brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.dw1.bits.indirect_offset);
400
	 } else {
401
            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, reg.dw1.bits.indirect_offset);
402
	 }
403
      }
404
 
405
      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
406
	 if (reg.width == BRW_WIDTH_1 &&
407
             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
408
            brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
409
            brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
410
            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
411
	 } else {
412
            brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
413
            brw_inst_set_src0_width(devinfo, inst, reg.width);
414
            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
415
	 }
416
      } else {
417
         brw_inst_set_src0_da16_swiz_x(devinfo, inst,
418
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
419
         brw_inst_set_src0_da16_swiz_y(devinfo, inst,
420
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
421
         brw_inst_set_src0_da16_swiz_z(devinfo, inst,
422
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
423
         brw_inst_set_src0_da16_swiz_w(devinfo, inst,
424
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
425
 
426
	 /* This is an oddity of the fact we're using the same
427
	  * descriptions for registers in align_16 as align_1:
428
	  */
429
	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
430
            brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
431
	 else
432
            brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
433
      }
434
   }
435
}
436
 
437
 
438
void
439
brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
440
{
441
   const struct brw_device_info *devinfo = p->devinfo;
442
 
443
   if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
444
      assert(reg.nr < 128);
445
 
446
   gen7_convert_mrf_to_grf(p, ®);
447
   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
448
 
449
   validate_reg(devinfo, inst, reg);
450
 
451
   brw_inst_set_src1_reg_file(devinfo, inst, reg.file);
452
   brw_inst_set_src1_reg_type(devinfo, inst,
453
                              brw_reg_type_to_hw_type(devinfo, reg.type, reg.file));
454
   brw_inst_set_src1_abs(devinfo, inst, reg.abs);
455
   brw_inst_set_src1_negate(devinfo, inst, reg.negate);
456
 
457
   /* Only src1 can be immediate in two-argument instructions.
458
    */
459
   assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
460
 
461
   if (reg.file == BRW_IMMEDIATE_VALUE) {
462
      brw_inst_set_imm_ud(devinfo, inst, reg.dw1.ud);
463
   } else {
464
      /* This is a hardware restriction, which may or may not be lifted
465
       * in the future:
466
       */
467
      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
468
      /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
469
 
470
      brw_inst_set_src1_da_reg_nr(devinfo, inst, reg.nr);
471
      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
472
         brw_inst_set_src1_da1_subreg_nr(devinfo, inst, reg.subnr);
473
      } else {
474
         brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
475
      }
476
 
477
      if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
478
	 if (reg.width == BRW_WIDTH_1 &&
479
             brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
480
            brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
481
            brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
482
            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
483
	 } else {
484
            brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
485
            brw_inst_set_src1_width(devinfo, inst, reg.width);
486
            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
487
	 }
488
      } else {
489
         brw_inst_set_src1_da16_swiz_x(devinfo, inst,
490
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X));
491
         brw_inst_set_src1_da16_swiz_y(devinfo, inst,
492
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y));
493
         brw_inst_set_src1_da16_swiz_z(devinfo, inst,
494
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z));
495
         brw_inst_set_src1_da16_swiz_w(devinfo, inst,
496
            BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W));
497
 
498
	 /* This is an oddity of the fact we're using the same
499
	  * descriptions for registers in align_16 as align_1:
500
	  */
501
	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
502
            brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
503
	 else
504
            brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
505
      }
506
   }
507
}
508
 
509
/**
510
 * Set the Message Descriptor and Extended Message Descriptor fields
511
 * for SEND messages.
512
 *
513
 * \note This zeroes out the Function Control bits, so it must be called
514
 *       \b before filling out any message-specific data.  Callers can
515
 *       choose not to fill in irrelevant bits; they will be zero.
516
 */
517
static void
518
brw_set_message_descriptor(struct brw_codegen *p,
519
			   brw_inst *inst,
520
			   enum brw_message_target sfid,
521
			   unsigned msg_length,
522
			   unsigned response_length,
523
			   bool header_present,
524
			   bool end_of_thread)
525
{
526
   const struct brw_device_info *devinfo = p->devinfo;
527
 
528
   brw_set_src1(p, inst, brw_imm_d(0));
529
 
530
   /* For indirect sends, `inst` will not be the SEND/SENDC instruction
531
    * itself; instead, it will be a MOV/OR into the address register.
532
    *
533
    * In this case, we avoid setting the extended message descriptor bits,
534
    * since they go on the later SEND/SENDC instead and if set here would
535
    * instead clobber the conditionalmod bits.
536
    */
537
   unsigned opcode = brw_inst_opcode(devinfo, inst);
538
   if (opcode == BRW_OPCODE_SEND || opcode == BRW_OPCODE_SENDC) {
539
      brw_inst_set_sfid(devinfo, inst, sfid);
540
   }
541
 
542
   brw_inst_set_mlen(devinfo, inst, msg_length);
543
   brw_inst_set_rlen(devinfo, inst, response_length);
544
   brw_inst_set_eot(devinfo, inst, end_of_thread);
545
 
546
   if (devinfo->gen >= 5) {
547
      brw_inst_set_header_present(devinfo, inst, header_present);
548
   }
549
}
550
 
551
static void brw_set_math_message( struct brw_codegen *p,
552
				  brw_inst *inst,
553
				  unsigned function,
554
				  unsigned integer_type,
555
				  bool low_precision,
556
				  unsigned dataType )
557
{
558
   const struct brw_device_info *devinfo = p->devinfo;
559
   unsigned msg_length;
560
   unsigned response_length;
561
 
562
   /* Infer message length from the function */
563
   switch (function) {
564
   case BRW_MATH_FUNCTION_POW:
565
   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
566
   case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
567
   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
568
      msg_length = 2;
569
      break;
570
   default:
571
      msg_length = 1;
572
      break;
573
   }
574
 
575
   /* Infer response length from the function */
576
   switch (function) {
577
   case BRW_MATH_FUNCTION_SINCOS:
578
   case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
579
      response_length = 2;
580
      break;
581
   default:
582
      response_length = 1;
583
      break;
584
   }
585
 
586
 
587
   brw_set_message_descriptor(p, inst, BRW_SFID_MATH,
588
			      msg_length, response_length, false, false);
589
   brw_inst_set_math_msg_function(devinfo, inst, function);
590
   brw_inst_set_math_msg_signed_int(devinfo, inst, integer_type);
591
   brw_inst_set_math_msg_precision(devinfo, inst, low_precision);
592
   brw_inst_set_math_msg_saturate(devinfo, inst, brw_inst_saturate(devinfo, inst));
593
   brw_inst_set_math_msg_data_type(devinfo, inst, dataType);
594
   brw_inst_set_saturate(devinfo, inst, 0);
595
}
596
 
597
 
598
static void brw_set_ff_sync_message(struct brw_codegen *p,
599
				    brw_inst *insn,
600
				    bool allocate,
601
				    unsigned response_length,
602
				    bool end_of_thread)
603
{
604
   const struct brw_device_info *devinfo = p->devinfo;
605
 
606
   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
607
			      1, response_length, true, end_of_thread);
608
   brw_inst_set_urb_opcode(devinfo, insn, 1); /* FF_SYNC */
609
   brw_inst_set_urb_allocate(devinfo, insn, allocate);
610
   /* The following fields are not used by FF_SYNC: */
611
   brw_inst_set_urb_global_offset(devinfo, insn, 0);
612
   brw_inst_set_urb_swizzle_control(devinfo, insn, 0);
613
   brw_inst_set_urb_used(devinfo, insn, 0);
614
   brw_inst_set_urb_complete(devinfo, insn, 0);
615
}
616
 
617
static void brw_set_urb_message( struct brw_codegen *p,
618
				 brw_inst *insn,
619
                                 enum brw_urb_write_flags flags,
620
				 unsigned msg_length,
621
				 unsigned response_length,
622
				 unsigned offset,
623
				 unsigned swizzle_control )
624
{
625
   const struct brw_device_info *devinfo = p->devinfo;
626
 
627
   assert(devinfo->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
628
   assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
629
   assert(devinfo->gen >= 7 || !(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
630
 
631
   brw_set_message_descriptor(p, insn, BRW_SFID_URB,
632
			      msg_length, response_length, true,
633
                              flags & BRW_URB_WRITE_EOT);
634
 
635
   if (flags & BRW_URB_WRITE_OWORD) {
636
      assert(msg_length == 2); /* header + one OWORD of data */
637
      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_OWORD);
638
   } else {
639
      brw_inst_set_urb_opcode(devinfo, insn, BRW_URB_OPCODE_WRITE_HWORD);
640
   }
641
 
642
   brw_inst_set_urb_global_offset(devinfo, insn, offset);
643
   brw_inst_set_urb_swizzle_control(devinfo, insn, swizzle_control);
644
 
645
   if (devinfo->gen < 8) {
646
      brw_inst_set_urb_complete(devinfo, insn, !!(flags & BRW_URB_WRITE_COMPLETE));
647
   }
648
 
649
   if (devinfo->gen < 7) {
650
      brw_inst_set_urb_allocate(devinfo, insn, !!(flags & BRW_URB_WRITE_ALLOCATE));
651
      brw_inst_set_urb_used(devinfo, insn, !(flags & BRW_URB_WRITE_UNUSED));
652
   } else {
653
      brw_inst_set_urb_per_slot_offset(devinfo, insn,
654
         !!(flags & BRW_URB_WRITE_PER_SLOT_OFFSET));
655
   }
656
}
657
 
658
void
659
brw_set_dp_write_message(struct brw_codegen *p,
660
			 brw_inst *insn,
661
			 unsigned binding_table_index,
662
			 unsigned msg_control,
663
			 unsigned msg_type,
664
			 unsigned msg_length,
665
			 bool header_present,
666
			 unsigned last_render_target,
667
			 unsigned response_length,
668
			 unsigned end_of_thread,
669
			 unsigned send_commit_msg)
670
{
671
   const struct brw_device_info *devinfo = p->devinfo;
672
   unsigned sfid;
673
 
674
   if (devinfo->gen >= 7) {
675
      /* Use the Render Cache for RT writes; otherwise use the Data Cache */
676
      if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
677
	 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
678
      else
679
	 sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
680
   } else if (devinfo->gen == 6) {
681
      /* Use the render cache for all write messages. */
682
      sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
683
   } else {
684
      sfid = BRW_SFID_DATAPORT_WRITE;
685
   }
686
 
687
   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
688
			      header_present, end_of_thread);
689
 
690
   brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
691
   brw_inst_set_dp_write_msg_type(devinfo, insn, msg_type);
692
   brw_inst_set_dp_write_msg_control(devinfo, insn, msg_control);
693
   brw_inst_set_rt_last(devinfo, insn, last_render_target);
694
   if (devinfo->gen < 7) {
695
      brw_inst_set_dp_write_commit(devinfo, insn, send_commit_msg);
696
   }
697
}
698
 
699
void
700
brw_set_dp_read_message(struct brw_codegen *p,
701
			brw_inst *insn,
702
			unsigned binding_table_index,
703
			unsigned msg_control,
704
			unsigned msg_type,
705
			unsigned target_cache,
706
			unsigned msg_length,
707
                        bool header_present,
708
			unsigned response_length)
709
{
710
   const struct brw_device_info *devinfo = p->devinfo;
711
   unsigned sfid;
712
 
713
   if (devinfo->gen >= 7) {
714
      sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
715
   } else if (devinfo->gen == 6) {
716
      if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
717
	 sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
718
      else
719
	 sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
720
   } else {
721
      sfid = BRW_SFID_DATAPORT_READ;
722
   }
723
 
724
   brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
725
			      header_present, false);
726
 
727
   brw_inst_set_binding_table_index(devinfo, insn, binding_table_index);
728
   brw_inst_set_dp_read_msg_type(devinfo, insn, msg_type);
729
   brw_inst_set_dp_read_msg_control(devinfo, insn, msg_control);
730
   if (devinfo->gen < 6)
731
      brw_inst_set_dp_read_target_cache(devinfo, insn, target_cache);
732
}
733
 
734
void
735
brw_set_sampler_message(struct brw_codegen *p,
736
                        brw_inst *inst,
737
                        unsigned binding_table_index,
738
                        unsigned sampler,
739
                        unsigned msg_type,
740
                        unsigned response_length,
741
                        unsigned msg_length,
742
                        unsigned header_present,
743
                        unsigned simd_mode,
744
                        unsigned return_format)
745
{
746
   const struct brw_device_info *devinfo = p->devinfo;
747
 
748
   brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length,
749
			      response_length, header_present, false);
750
 
751
   brw_inst_set_binding_table_index(devinfo, inst, binding_table_index);
752
   brw_inst_set_sampler(devinfo, inst, sampler);
753
   brw_inst_set_sampler_msg_type(devinfo, inst, msg_type);
754
   if (devinfo->gen >= 5) {
755
      brw_inst_set_sampler_simd_mode(devinfo, inst, simd_mode);
756
   } else if (devinfo->gen == 4 && !devinfo->is_g4x) {
757
      brw_inst_set_sampler_return_format(devinfo, inst, return_format);
758
   }
759
}
760
 
761
static void
762
gen7_set_dp_scratch_message(struct brw_codegen *p,
763
                            brw_inst *inst,
764
                            bool write,
765
                            bool dword,
766
                            bool invalidate_after_read,
767
                            unsigned num_regs,
768
                            unsigned addr_offset,
769
                            unsigned mlen,
770
                            unsigned rlen,
771
                            bool header_present)
772
{
773
   const struct brw_device_info *devinfo = p->devinfo;
774
   assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
775
          (devinfo->gen >= 8 && num_regs == 8));
776
   brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
777
                              mlen, rlen, header_present, false);
778
   brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
779
   brw_inst_set_scratch_read_write(devinfo, inst, write);
780
   brw_inst_set_scratch_type(devinfo, inst, dword);
781
   brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
782
   brw_inst_set_scratch_block_size(devinfo, inst, ffs(num_regs) - 1);
783
   brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
784
}
785
 
786
#define next_insn brw_next_insn
787
brw_inst *
788
brw_next_insn(struct brw_codegen *p, unsigned opcode)
789
{
790
   const struct brw_device_info *devinfo = p->devinfo;
791
   brw_inst *insn;
792
 
793
   if (p->nr_insn + 1 > p->store_size) {
794
      p->store_size <<= 1;
795
      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
796
   }
797
 
798
   p->next_insn_offset += 16;
799
   insn = &p->store[p->nr_insn++];
800
   memcpy(insn, p->current, sizeof(*insn));
801
 
802
   brw_inst_set_opcode(devinfo, insn, opcode);
803
   return insn;
804
}
805
 
806
static brw_inst *
807
brw_alu1(struct brw_codegen *p, unsigned opcode,
808
         struct brw_reg dest, struct brw_reg src)
809
{
810
   brw_inst *insn = next_insn(p, opcode);
811
   brw_set_dest(p, insn, dest);
812
   brw_set_src0(p, insn, src);
813
   return insn;
814
}
815
 
816
static brw_inst *
817
brw_alu2(struct brw_codegen *p, unsigned opcode,
818
         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
819
{
820
   brw_inst *insn = next_insn(p, opcode);
821
   brw_set_dest(p, insn, dest);
822
   brw_set_src0(p, insn, src0);
823
   brw_set_src1(p, insn, src1);
824
   return insn;
825
}
826
 
827
static int
828
get_3src_subreg_nr(struct brw_reg reg)
829
{
830
   if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
831
      assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
832
      return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
833
   } else {
834
      return reg.subnr / 4;
835
   }
836
}
837
 
838
static brw_inst *
839
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
840
         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
841
{
842
   const struct brw_device_info *devinfo = p->devinfo;
843
   brw_inst *inst = next_insn(p, opcode);
844
 
845
   gen7_convert_mrf_to_grf(p, &dest);
846
 
847
   assert(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16);
848
 
849
   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
850
	  dest.file == BRW_MESSAGE_REGISTER_FILE);
851
   assert(dest.nr < 128);
852
   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
853
   assert(dest.type == BRW_REGISTER_TYPE_F ||
854
          dest.type == BRW_REGISTER_TYPE_D ||
855
          dest.type == BRW_REGISTER_TYPE_UD);
856
   if (devinfo->gen == 6) {
857
      brw_inst_set_3src_dst_reg_file(devinfo, inst,
858
                                     dest.file == BRW_MESSAGE_REGISTER_FILE);
859
   }
860
   brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
861
   brw_inst_set_3src_dst_subreg_nr(devinfo, inst, dest.subnr / 16);
862
   brw_inst_set_3src_dst_writemask(devinfo, inst, dest.dw1.bits.writemask);
863
 
864
   assert(src0.file == BRW_GENERAL_REGISTER_FILE);
865
   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
866
   assert(src0.nr < 128);
867
   brw_inst_set_3src_src0_swizzle(devinfo, inst, src0.dw1.bits.swizzle);
868
   brw_inst_set_3src_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
869
   brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
870
   brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
871
   brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
872
   brw_inst_set_3src_src0_rep_ctrl(devinfo, inst,
873
                                   src0.vstride == BRW_VERTICAL_STRIDE_0);
874
 
875
   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
876
   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
877
   assert(src1.nr < 128);
878
   brw_inst_set_3src_src1_swizzle(devinfo, inst, src1.dw1.bits.swizzle);
879
   brw_inst_set_3src_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
880
   brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
881
   brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
882
   brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
883
   brw_inst_set_3src_src1_rep_ctrl(devinfo, inst,
884
                                   src1.vstride == BRW_VERTICAL_STRIDE_0);
885
 
886
   assert(src2.file == BRW_GENERAL_REGISTER_FILE);
887
   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
888
   assert(src2.nr < 128);
889
   brw_inst_set_3src_src2_swizzle(devinfo, inst, src2.dw1.bits.swizzle);
890
   brw_inst_set_3src_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
891
   brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
892
   brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
893
   brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
894
   brw_inst_set_3src_src2_rep_ctrl(devinfo, inst,
895
                                   src2.vstride == BRW_VERTICAL_STRIDE_0);
896
 
897
   if (devinfo->gen >= 7) {
898
      /* Set both the source and destination types based on dest.type,
899
       * ignoring the source register types.  The MAD and LRP emitters ensure
900
       * that all four types are float.  The BFE and BFI2 emitters, however,
901
       * may send us mixed D and UD types and want us to ignore that and use
902
       * the destination type.
903
       */
904
      switch (dest.type) {
905
      case BRW_REGISTER_TYPE_F:
906
         brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_F);
907
         brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_F);
908
         break;
909
      case BRW_REGISTER_TYPE_D:
910
         brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_D);
911
         brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_D);
912
         break;
913
      case BRW_REGISTER_TYPE_UD:
914
         brw_inst_set_3src_src_type(devinfo, inst, BRW_3SRC_TYPE_UD);
915
         brw_inst_set_3src_dst_type(devinfo, inst, BRW_3SRC_TYPE_UD);
916
         break;
917
      }
918
   }
919
 
920
   return inst;
921
}
922
 
923
 
924
/***********************************************************************
925
 * Convenience routines.
926
 */
927
#define ALU1(OP)					\
928
brw_inst *brw_##OP(struct brw_codegen *p,		\
929
	      struct brw_reg dest,			\
930
	      struct brw_reg src0)   			\
931
{							\
932
   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
933
}
934
 
935
#define ALU2(OP)					\
936
brw_inst *brw_##OP(struct brw_codegen *p,		\
937
	      struct brw_reg dest,			\
938
	      struct brw_reg src0,			\
939
	      struct brw_reg src1)   			\
940
{							\
941
   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
942
}
943
 
944
#define ALU3(OP)					\
945
brw_inst *brw_##OP(struct brw_codegen *p,		\
946
	      struct brw_reg dest,			\
947
	      struct brw_reg src0,			\
948
	      struct brw_reg src1,			\
949
	      struct brw_reg src2)   			\
950
{							\
951
   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
952
}
953
 
954
#define ALU3F(OP)                                               \
955
brw_inst *brw_##OP(struct brw_codegen *p,         \
956
                                 struct brw_reg dest,           \
957
                                 struct brw_reg src0,           \
958
                                 struct brw_reg src1,           \
959
                                 struct brw_reg src2)           \
960
{                                                               \
961
   assert(dest.type == BRW_REGISTER_TYPE_F);                    \
962
   assert(src0.type == BRW_REGISTER_TYPE_F);                    \
963
   assert(src1.type == BRW_REGISTER_TYPE_F);                    \
964
   assert(src2.type == BRW_REGISTER_TYPE_F);                    \
965
   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
966
}
967
 
968
/* Rounding operations (other than RNDD) require two instructions - the first
969
 * stores a rounded value (possibly the wrong way) in the dest register, but
970
 * also sets a per-channel "increment bit" in the flag register.  A predicated
971
 * add of 1.0 fixes dest to contain the desired result.
972
 *
973
 * Sandybridge and later appear to round correctly without an ADD.
974
 */
975
#define ROUND(OP)							      \
976
void brw_##OP(struct brw_codegen *p,					      \
977
	      struct brw_reg dest,					      \
978
	      struct brw_reg src)					      \
979
{									      \
980
   const struct brw_device_info *devinfo = p->devinfo;					      \
981
   brw_inst *rnd, *add;							      \
982
   rnd = next_insn(p, BRW_OPCODE_##OP);					      \
983
   brw_set_dest(p, rnd, dest);						      \
984
   brw_set_src0(p, rnd, src);						      \
985
									      \
986
   if (devinfo->gen < 6) {							      \
987
      /* turn on round-increments */					      \
988
      brw_inst_set_cond_modifier(devinfo, rnd, BRW_CONDITIONAL_R);            \
989
      add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));			      \
990
      brw_inst_set_pred_control(devinfo, add, BRW_PREDICATE_NORMAL);          \
991
   }									      \
992
}
993
 
994
 
995
ALU1(MOV)
996
ALU2(SEL)
997
ALU1(NOT)
998
ALU2(AND)
999
ALU2(OR)
1000
ALU2(XOR)
1001
ALU2(SHR)
1002
ALU2(SHL)
1003
ALU2(ASR)
1004
ALU1(FRC)
1005
ALU1(RNDD)
1006
ALU2(MAC)
1007
ALU2(MACH)
1008
ALU1(LZD)
1009
ALU2(DP4)
1010
ALU2(DPH)
1011
ALU2(DP3)
1012
ALU2(DP2)
1013
ALU3F(MAD)
1014
ALU3F(LRP)
1015
ALU1(BFREV)
1016
ALU3(BFE)
1017
ALU2(BFI1)
1018
ALU3(BFI2)
1019
ALU1(FBH)
1020
ALU1(FBL)
1021
ALU1(CBIT)
1022
ALU2(ADDC)
1023
ALU2(SUBB)
1024
 
1025
ROUND(RNDZ)
1026
ROUND(RNDE)
1027
 
1028
 
1029
brw_inst *
1030
brw_ADD(struct brw_codegen *p, struct brw_reg dest,
1031
        struct brw_reg src0, struct brw_reg src1)
1032
{
1033
   /* 6.2.2: add */
1034
   if (src0.type == BRW_REGISTER_TYPE_F ||
1035
       (src0.file == BRW_IMMEDIATE_VALUE &&
1036
	src0.type == BRW_REGISTER_TYPE_VF)) {
1037
      assert(src1.type != BRW_REGISTER_TYPE_UD);
1038
      assert(src1.type != BRW_REGISTER_TYPE_D);
1039
   }
1040
 
1041
   if (src1.type == BRW_REGISTER_TYPE_F ||
1042
       (src1.file == BRW_IMMEDIATE_VALUE &&
1043
	src1.type == BRW_REGISTER_TYPE_VF)) {
1044
      assert(src0.type != BRW_REGISTER_TYPE_UD);
1045
      assert(src0.type != BRW_REGISTER_TYPE_D);
1046
   }
1047
 
1048
   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1049
}
1050
 
1051
brw_inst *
1052
brw_AVG(struct brw_codegen *p, struct brw_reg dest,
1053
        struct brw_reg src0, struct brw_reg src1)
1054
{
1055
   assert(dest.type == src0.type);
1056
   assert(src0.type == src1.type);
1057
   switch (src0.type) {
1058
   case BRW_REGISTER_TYPE_B:
1059
   case BRW_REGISTER_TYPE_UB:
1060
   case BRW_REGISTER_TYPE_W:
1061
   case BRW_REGISTER_TYPE_UW:
1062
   case BRW_REGISTER_TYPE_D:
1063
   case BRW_REGISTER_TYPE_UD:
1064
      break;
1065
   default:
1066
      unreachable("Bad type for brw_AVG");
1067
   }
1068
 
1069
   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
1070
}
1071
 
1072
brw_inst *
1073
brw_MUL(struct brw_codegen *p, struct brw_reg dest,
1074
        struct brw_reg src0, struct brw_reg src1)
1075
{
1076
   /* 6.32.38: mul */
1077
   if (src0.type == BRW_REGISTER_TYPE_D ||
1078
       src0.type == BRW_REGISTER_TYPE_UD ||
1079
       src1.type == BRW_REGISTER_TYPE_D ||
1080
       src1.type == BRW_REGISTER_TYPE_UD) {
1081
      assert(dest.type != BRW_REGISTER_TYPE_F);
1082
   }
1083
 
1084
   if (src0.type == BRW_REGISTER_TYPE_F ||
1085
       (src0.file == BRW_IMMEDIATE_VALUE &&
1086
	src0.type == BRW_REGISTER_TYPE_VF)) {
1087
      assert(src1.type != BRW_REGISTER_TYPE_UD);
1088
      assert(src1.type != BRW_REGISTER_TYPE_D);
1089
   }
1090
 
1091
   if (src1.type == BRW_REGISTER_TYPE_F ||
1092
       (src1.file == BRW_IMMEDIATE_VALUE &&
1093
	src1.type == BRW_REGISTER_TYPE_VF)) {
1094
      assert(src0.type != BRW_REGISTER_TYPE_UD);
1095
      assert(src0.type != BRW_REGISTER_TYPE_D);
1096
   }
1097
 
1098
   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1099
	  src0.nr != BRW_ARF_ACCUMULATOR);
1100
   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
1101
	  src1.nr != BRW_ARF_ACCUMULATOR);
1102
 
1103
   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
1104
}
1105
 
1106
brw_inst *
1107
brw_LINE(struct brw_codegen *p, struct brw_reg dest,
1108
         struct brw_reg src0, struct brw_reg src1)
1109
{
1110
   src0.vstride = BRW_VERTICAL_STRIDE_0;
1111
   src0.width = BRW_WIDTH_1;
1112
   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1113
   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
1114
}
1115
 
1116
brw_inst *
1117
brw_PLN(struct brw_codegen *p, struct brw_reg dest,
1118
        struct brw_reg src0, struct brw_reg src1)
1119
{
1120
   src0.vstride = BRW_VERTICAL_STRIDE_0;
1121
   src0.width = BRW_WIDTH_1;
1122
   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
1123
   src1.vstride = BRW_VERTICAL_STRIDE_8;
1124
   src1.width = BRW_WIDTH_8;
1125
   src1.hstride = BRW_HORIZONTAL_STRIDE_1;
1126
   return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
1127
}
1128
 
1129
brw_inst *
1130
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1131
{
1132
   const struct brw_device_info *devinfo = p->devinfo;
1133
   const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
1134
   /* The F32TO16 instruction doesn't support 32-bit destination types in
1135
    * Align1 mode, and neither does the Gen8 implementation in terms of a
1136
    * converting MOV.  Gen7 does zero out the high 16 bits in Align16 mode as
1137
    * an undocumented feature.
1138
    */
1139
   const bool needs_zero_fill = (dst.type == BRW_REGISTER_TYPE_UD &&
1140
                                 (!align16 || devinfo->gen >= 8));
1141
   brw_inst *inst;
1142
 
1143
   if (align16) {
1144
      assert(dst.type == BRW_REGISTER_TYPE_UD);
1145
   } else {
1146
      assert(dst.type == BRW_REGISTER_TYPE_UD ||
1147
             dst.type == BRW_REGISTER_TYPE_W ||
1148
             dst.type == BRW_REGISTER_TYPE_UW ||
1149
             dst.type == BRW_REGISTER_TYPE_HF);
1150
   }
1151
 
1152
   brw_push_insn_state(p);
1153
 
1154
   if (needs_zero_fill) {
1155
      brw_set_default_access_mode(p, BRW_ALIGN_1);
1156
      dst = spread(retype(dst, BRW_REGISTER_TYPE_W), 2);
1157
   }
1158
 
1159
   if (devinfo->gen >= 8) {
1160
      inst = brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src);
1161
   } else {
1162
      assert(devinfo->gen == 7);
1163
      inst = brw_alu1(p, BRW_OPCODE_F32TO16, dst, src);
1164
   }
1165
 
1166
   if (needs_zero_fill) {
1167
      brw_inst_set_no_dd_clear(devinfo, inst, true);
1168
      inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
1169
      brw_inst_set_no_dd_check(devinfo, inst, true);
1170
   }
1171
 
1172
   brw_pop_insn_state(p);
1173
   return inst;
1174
}
1175
 
1176
brw_inst *
1177
brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
1178
{
1179
   const struct brw_device_info *devinfo = p->devinfo;
1180
   bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
1181
 
1182
   if (align16) {
1183
      assert(src.type == BRW_REGISTER_TYPE_UD);
1184
   } else {
1185
      /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32:
1186
       *
1187
       *   Because this instruction does not have a 16-bit floating-point
1188
       *   type, the source data type must be Word (W). The destination type
1189
       *   must be F (Float).
1190
       */
1191
      if (src.type == BRW_REGISTER_TYPE_UD)
1192
         src = spread(retype(src, BRW_REGISTER_TYPE_W), 2);
1193
 
1194
      assert(src.type == BRW_REGISTER_TYPE_W ||
1195
             src.type == BRW_REGISTER_TYPE_UW ||
1196
             src.type == BRW_REGISTER_TYPE_HF);
1197
   }
1198
 
1199
   if (devinfo->gen >= 8) {
1200
      return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF));
1201
   } else {
1202
      assert(devinfo->gen == 7);
1203
      return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src);
1204
   }
1205
}
1206
 
1207
 
1208
void brw_NOP(struct brw_codegen *p)
1209
{
1210
   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
1211
   brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1212
   brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1213
   brw_set_src1(p, insn, brw_imm_ud(0x0));
1214
}
1215
 
1216
 
1217
 
1218
 
1219
 
1220
/***********************************************************************
1221
 * Comparisons, if/else/endif
1222
 */
1223
 
1224
brw_inst *
1225
brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1226
         unsigned predicate_control)
1227
{
1228
   const struct brw_device_info *devinfo = p->devinfo;
1229
   struct brw_reg ip = brw_ip_reg();
1230
   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
1231
 
1232
   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2);
1233
   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
1234
   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
1235
   brw_inst_set_pred_control(devinfo, inst, predicate_control);
1236
 
1237
   return inst;
1238
}
1239
 
1240
static void
1241
push_if_stack(struct brw_codegen *p, brw_inst *inst)
1242
{
1243
   p->if_stack[p->if_stack_depth] = inst - p->store;
1244
 
1245
   p->if_stack_depth++;
1246
   if (p->if_stack_array_size <= p->if_stack_depth) {
1247
      p->if_stack_array_size *= 2;
1248
      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
1249
			     p->if_stack_array_size);
1250
   }
1251
}
1252
 
1253
static brw_inst *
1254
pop_if_stack(struct brw_codegen *p)
1255
{
1256
   p->if_stack_depth--;
1257
   return &p->store[p->if_stack[p->if_stack_depth]];
1258
}
1259
 
1260
static void
1261
push_loop_stack(struct brw_codegen *p, brw_inst *inst)
1262
{
1263
   if (p->loop_stack_array_size < p->loop_stack_depth) {
1264
      p->loop_stack_array_size *= 2;
1265
      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
1266
			       p->loop_stack_array_size);
1267
      p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
1268
				     p->loop_stack_array_size);
1269
   }
1270
 
1271
   p->loop_stack[p->loop_stack_depth] = inst - p->store;
1272
   p->loop_stack_depth++;
1273
   p->if_depth_in_loop[p->loop_stack_depth] = 0;
1274
}
1275
 
1276
static brw_inst *
1277
get_inner_do_insn(struct brw_codegen *p)
1278
{
1279
   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
1280
}
1281
 
1282
/* EU takes the value from the flag register and pushes it onto some
1283
 * sort of a stack (presumably merging with any flag value already on
1284
 * the stack).  Within an if block, the flags at the top of the stack
1285
 * control execution on each channel of the unit, eg. on each of the
1286
 * 16 pixel values in our wm programs.
1287
 *
1288
 * When the matching 'else' instruction is reached (presumably by
1289
 * countdown of the instruction count patched in by our ELSE/ENDIF
1290
 * functions), the relevant flags are inverted.
1291
 *
1292
 * When the matching 'endif' instruction is reached, the flags are
1293
 * popped off.  If the stack is now empty, normal execution resumes.
1294
 */
1295
brw_inst *
1296
brw_IF(struct brw_codegen *p, unsigned execute_size)
1297
{
1298
   const struct brw_device_info *devinfo = p->devinfo;
1299
   brw_inst *insn;
1300
 
1301
   insn = next_insn(p, BRW_OPCODE_IF);
1302
 
1303
   /* Override the defaults for this instruction:
1304
    */
1305
   if (devinfo->gen < 6) {
1306
      brw_set_dest(p, insn, brw_ip_reg());
1307
      brw_set_src0(p, insn, brw_ip_reg());
1308
      brw_set_src1(p, insn, brw_imm_d(0x0));
1309
   } else if (devinfo->gen == 6) {
1310
      brw_set_dest(p, insn, brw_imm_w(0));
1311
      brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1312
      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1313
      brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1314
   } else if (devinfo->gen == 7) {
1315
      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1316
      brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1317
      brw_set_src1(p, insn, brw_imm_w(0));
1318
      brw_inst_set_jip(devinfo, insn, 0);
1319
      brw_inst_set_uip(devinfo, insn, 0);
1320
   } else {
1321
      brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
1322
      brw_set_src0(p, insn, brw_imm_d(0));
1323
      brw_inst_set_jip(devinfo, insn, 0);
1324
      brw_inst_set_uip(devinfo, insn, 0);
1325
   }
1326
 
1327
   brw_inst_set_exec_size(devinfo, insn, execute_size);
1328
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1329
   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
1330
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1331
   if (!p->single_program_flow && devinfo->gen < 6)
1332
      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1333
 
1334
   push_if_stack(p, insn);
1335
   p->if_depth_in_loop[p->loop_stack_depth]++;
1336
   return insn;
1337
}
1338
 
1339
/* This function is only used for gen6-style IF instructions with an
1340
 * embedded comparison (conditional modifier).  It is not used on gen7.
1341
 */
1342
brw_inst *
1343
gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1344
	struct brw_reg src0, struct brw_reg src1)
1345
{
1346
   const struct brw_device_info *devinfo = p->devinfo;
1347
   brw_inst *insn;
1348
 
1349
   insn = next_insn(p, BRW_OPCODE_IF);
1350
 
1351
   brw_set_dest(p, insn, brw_imm_w(0));
1352
   brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16
1353
                                                   : BRW_EXECUTE_8);
1354
   brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1355
   brw_set_src0(p, insn, src0);
1356
   brw_set_src1(p, insn, src1);
1357
 
1358
   assert(brw_inst_qtr_control(devinfo, insn) == BRW_COMPRESSION_NONE);
1359
   assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
1360
   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1361
 
1362
   push_if_stack(p, insn);
1363
   return insn;
1364
}
1365
 
1366
/**
1367
 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
1368
 */
1369
static void
1370
convert_IF_ELSE_to_ADD(struct brw_codegen *p,
1371
                       brw_inst *if_inst, brw_inst *else_inst)
1372
{
1373
   const struct brw_device_info *devinfo = p->devinfo;
1374
 
1375
   /* The next instruction (where the ENDIF would be, if it existed) */
1376
   brw_inst *next_inst = &p->store[p->nr_insn];
1377
 
1378
   assert(p->single_program_flow);
1379
   assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
1380
   assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
1381
   assert(brw_inst_exec_size(devinfo, if_inst) == BRW_EXECUTE_1);
1382
 
1383
   /* Convert IF to an ADD instruction that moves the instruction pointer
1384
    * to the first instruction of the ELSE block.  If there is no ELSE
1385
    * block, point to where ENDIF would be.  Reverse the predicate.
1386
    *
1387
    * There's no need to execute an ENDIF since we don't need to do any
1388
    * stack operations, and if we're currently executing, we just want to
1389
    * continue normally.
1390
    */
1391
   brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_ADD);
1392
   brw_inst_set_pred_inv(devinfo, if_inst, true);
1393
 
1394
   if (else_inst != NULL) {
1395
      /* Convert ELSE to an ADD instruction that points where the ENDIF
1396
       * would be.
1397
       */
1398
      brw_inst_set_opcode(devinfo, else_inst, BRW_OPCODE_ADD);
1399
 
1400
      brw_inst_set_imm_ud(devinfo, if_inst, (else_inst - if_inst + 1) * 16);
1401
      brw_inst_set_imm_ud(devinfo, else_inst, (next_inst - else_inst) * 16);
1402
   } else {
1403
      brw_inst_set_imm_ud(devinfo, if_inst, (next_inst - if_inst) * 16);
1404
   }
1405
}
1406
 
1407
/**
1408
 * Patch IF and ELSE instructions with appropriate jump targets.
1409
 */
1410
static void
1411
patch_IF_ELSE(struct brw_codegen *p,
1412
              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
1413
{
1414
   const struct brw_device_info *devinfo = p->devinfo;
1415
 
1416
   /* We shouldn't be patching IF and ELSE instructions in single program flow
1417
    * mode when gen < 6, because in single program flow mode on those
1418
    * platforms, we convert flow control instructions to conditional ADDs that
1419
    * operate on IP (see brw_ENDIF).
1420
    *
1421
    * However, on Gen6, writing to IP doesn't work in single program flow mode
1422
    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1423
    * not be updated by non-flow control instructions.").  And on later
1424
    * platforms, there is no significant benefit to converting control flow
1425
    * instructions to conditional ADDs.  So we do patch IF and ELSE
1426
    * instructions in single program flow mode on those platforms.
1427
    */
1428
   if (devinfo->gen < 6)
1429
      assert(!p->single_program_flow);
1430
 
1431
   assert(if_inst != NULL && brw_inst_opcode(devinfo, if_inst) == BRW_OPCODE_IF);
1432
   assert(endif_inst != NULL);
1433
   assert(else_inst == NULL || brw_inst_opcode(devinfo, else_inst) == BRW_OPCODE_ELSE);
1434
 
1435
   unsigned br = brw_jump_scale(devinfo);
1436
 
1437
   assert(brw_inst_opcode(devinfo, endif_inst) == BRW_OPCODE_ENDIF);
1438
   brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
1439
 
1440
   if (else_inst == NULL) {
1441
      /* Patch IF -> ENDIF */
1442
      if (devinfo->gen < 6) {
1443
	 /* Turn it into an IFF, which means no mask stack operations for
1444
	  * all-false and jumping past the ENDIF.
1445
	  */
1446
         brw_inst_set_opcode(devinfo, if_inst, BRW_OPCODE_IFF);
1447
         brw_inst_set_gen4_jump_count(devinfo, if_inst,
1448
                                      br * (endif_inst - if_inst + 1));
1449
         brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
1450
      } else if (devinfo->gen == 6) {
1451
	 /* As of gen6, there is no IFF and IF must point to the ENDIF. */
1452
         brw_inst_set_gen6_jump_count(devinfo, if_inst, br*(endif_inst - if_inst));
1453
      } else {
1454
         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1455
         brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
1456
      }
1457
   } else {
1458
      brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
1459
 
1460
      /* Patch IF -> ELSE */
1461
      if (devinfo->gen < 6) {
1462
         brw_inst_set_gen4_jump_count(devinfo, if_inst,
1463
                                      br * (else_inst - if_inst));
1464
         brw_inst_set_gen4_pop_count(devinfo, if_inst, 0);
1465
      } else if (devinfo->gen == 6) {
1466
         brw_inst_set_gen6_jump_count(devinfo, if_inst,
1467
                                      br * (else_inst - if_inst + 1));
1468
      }
1469
 
1470
      /* Patch ELSE -> ENDIF */
1471
      if (devinfo->gen < 6) {
1472
	 /* BRW_OPCODE_ELSE pre-gen6 should point just past the
1473
	  * matching ENDIF.
1474
	  */
1475
         brw_inst_set_gen4_jump_count(devinfo, else_inst,
1476
                                      br * (endif_inst - else_inst + 1));
1477
         brw_inst_set_gen4_pop_count(devinfo, else_inst, 1);
1478
      } else if (devinfo->gen == 6) {
1479
	 /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
1480
         brw_inst_set_gen6_jump_count(devinfo, else_inst,
1481
                                      br * (endif_inst - else_inst));
1482
      } else {
1483
	 /* The IF instruction's JIP should point just past the ELSE */
1484
         brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
1485
	 /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
1486
         brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
1487
         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
1488
         if (devinfo->gen >= 8) {
1489
            /* Since we don't set branch_ctrl, the ELSE's JIP and UIP both
1490
             * should point to ENDIF.
1491
             */
1492
            brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
1493
         }
1494
      }
1495
   }
1496
}
1497
 
1498
void
1499
brw_ELSE(struct brw_codegen *p)
1500
{
1501
   const struct brw_device_info *devinfo = p->devinfo;
1502
   brw_inst *insn;
1503
 
1504
   insn = next_insn(p, BRW_OPCODE_ELSE);
1505
 
1506
   if (devinfo->gen < 6) {
1507
      brw_set_dest(p, insn, brw_ip_reg());
1508
      brw_set_src0(p, insn, brw_ip_reg());
1509
      brw_set_src1(p, insn, brw_imm_d(0x0));
1510
   } else if (devinfo->gen == 6) {
1511
      brw_set_dest(p, insn, brw_imm_w(0));
1512
      brw_inst_set_gen6_jump_count(devinfo, insn, 0);
1513
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1514
      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1515
   } else if (devinfo->gen == 7) {
1516
      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1517
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1518
      brw_set_src1(p, insn, brw_imm_w(0));
1519
      brw_inst_set_jip(devinfo, insn, 0);
1520
      brw_inst_set_uip(devinfo, insn, 0);
1521
   } else {
1522
      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1523
      brw_set_src0(p, insn, brw_imm_d(0));
1524
      brw_inst_set_jip(devinfo, insn, 0);
1525
      brw_inst_set_uip(devinfo, insn, 0);
1526
   }
1527
 
1528
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1529
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1530
   if (!p->single_program_flow && devinfo->gen < 6)
1531
      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1532
 
1533
   push_if_stack(p, insn);
1534
}
1535
 
1536
void
1537
brw_ENDIF(struct brw_codegen *p)
1538
{
1539
   const struct brw_device_info *devinfo = p->devinfo;
1540
   brw_inst *insn = NULL;
1541
   brw_inst *else_inst = NULL;
1542
   brw_inst *if_inst = NULL;
1543
   brw_inst *tmp;
1544
   bool emit_endif = true;
1545
 
1546
   /* In single program flow mode, we can express IF and ELSE instructions
1547
    * equivalently as ADD instructions that operate on IP.  On platforms prior
1548
    * to Gen6, flow control instructions cause an implied thread switch, so
1549
    * this is a significant savings.
1550
    *
1551
    * However, on Gen6, writing to IP doesn't work in single program flow mode
1552
    * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
1553
    * not be updated by non-flow control instructions.").  And on later
1554
    * platforms, there is no significant benefit to converting control flow
1555
    * instructions to conditional ADDs.  So we only do this trick on Gen4 and
1556
    * Gen5.
1557
    */
1558
   if (devinfo->gen < 6 && p->single_program_flow)
1559
      emit_endif = false;
1560
 
1561
   /*
1562
    * A single next_insn() may change the base address of instruction store
1563
    * memory(p->store), so call it first before referencing the instruction
1564
    * store pointer from an index
1565
    */
1566
   if (emit_endif)
1567
      insn = next_insn(p, BRW_OPCODE_ENDIF);
1568
 
1569
   /* Pop the IF and (optional) ELSE instructions from the stack */
1570
   p->if_depth_in_loop[p->loop_stack_depth]--;
1571
   tmp = pop_if_stack(p);
1572
   if (brw_inst_opcode(devinfo, tmp) == BRW_OPCODE_ELSE) {
1573
      else_inst = tmp;
1574
      tmp = pop_if_stack(p);
1575
   }
1576
   if_inst = tmp;
1577
 
1578
   if (!emit_endif) {
1579
      /* ENDIF is useless; don't bother emitting it. */
1580
      convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
1581
      return;
1582
   }
1583
 
1584
   if (devinfo->gen < 6) {
1585
      brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1586
      brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
1587
      brw_set_src1(p, insn, brw_imm_d(0x0));
1588
   } else if (devinfo->gen == 6) {
1589
      brw_set_dest(p, insn, brw_imm_w(0));
1590
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1591
      brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1592
   } else if (devinfo->gen == 7) {
1593
      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1594
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1595
      brw_set_src1(p, insn, brw_imm_w(0));
1596
   } else {
1597
      brw_set_src0(p, insn, brw_imm_d(0));
1598
   }
1599
 
1600
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1601
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
1602
   if (devinfo->gen < 6)
1603
      brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1604
 
1605
   /* Also pop item off the stack in the endif instruction: */
1606
   if (devinfo->gen < 6) {
1607
      brw_inst_set_gen4_jump_count(devinfo, insn, 0);
1608
      brw_inst_set_gen4_pop_count(devinfo, insn, 1);
1609
   } else if (devinfo->gen == 6) {
1610
      brw_inst_set_gen6_jump_count(devinfo, insn, 2);
1611
   } else {
1612
      brw_inst_set_jip(devinfo, insn, 2);
1613
   }
1614
   patch_IF_ELSE(p, if_inst, else_inst, insn);
1615
}
1616
 
1617
brw_inst *
1618
brw_BREAK(struct brw_codegen *p)
1619
{
1620
   const struct brw_device_info *devinfo = p->devinfo;
1621
   brw_inst *insn;
1622
 
1623
   insn = next_insn(p, BRW_OPCODE_BREAK);
1624
   if (devinfo->gen >= 8) {
1625
      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1626
      brw_set_src0(p, insn, brw_imm_d(0x0));
1627
   } else if (devinfo->gen >= 6) {
1628
      brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1629
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1630
      brw_set_src1(p, insn, brw_imm_d(0x0));
1631
   } else {
1632
      brw_set_dest(p, insn, brw_ip_reg());
1633
      brw_set_src0(p, insn, brw_ip_reg());
1634
      brw_set_src1(p, insn, brw_imm_d(0x0));
1635
      brw_inst_set_gen4_pop_count(devinfo, insn,
1636
                                  p->if_depth_in_loop[p->loop_stack_depth]);
1637
   }
1638
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1639
   brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16
1640
                                                   : BRW_EXECUTE_8);
1641
 
1642
   return insn;
1643
}
1644
 
1645
brw_inst *
1646
brw_CONT(struct brw_codegen *p)
1647
{
1648
   const struct brw_device_info *devinfo = p->devinfo;
1649
   brw_inst *insn;
1650
 
1651
   insn = next_insn(p, BRW_OPCODE_CONTINUE);
1652
   brw_set_dest(p, insn, brw_ip_reg());
1653
   if (devinfo->gen >= 8) {
1654
      brw_set_src0(p, insn, brw_imm_d(0x0));
1655
   } else {
1656
      brw_set_src0(p, insn, brw_ip_reg());
1657
      brw_set_src1(p, insn, brw_imm_d(0x0));
1658
   }
1659
 
1660
   if (devinfo->gen < 6) {
1661
      brw_inst_set_gen4_pop_count(devinfo, insn,
1662
                                  p->if_depth_in_loop[p->loop_stack_depth]);
1663
   }
1664
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1665
   brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16
1666
                                                   : BRW_EXECUTE_8);
1667
   return insn;
1668
}
1669
 
1670
brw_inst *
1671
gen6_HALT(struct brw_codegen *p)
1672
{
1673
   const struct brw_device_info *devinfo = p->devinfo;
1674
   brw_inst *insn;
1675
 
1676
   insn = next_insn(p, BRW_OPCODE_HALT);
1677
   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1678
   if (devinfo->gen >= 8) {
1679
      brw_set_src0(p, insn, brw_imm_d(0x0));
1680
   } else {
1681
      brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1682
      brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
1683
   }
1684
 
1685
   if (p->compressed) {
1686
      brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_16);
1687
   } else {
1688
      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1689
      brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_8);
1690
   }
1691
   return insn;
1692
}
1693
 
1694
/* DO/WHILE loop:
1695
 *
1696
 * The DO/WHILE is just an unterminated loop -- break or continue are
1697
 * used for control within the loop.  We have a few ways they can be
1698
 * done.
1699
 *
1700
 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1701
 * jip and no DO instruction.
1702
 *
1703
 * For non-uniform control flow pre-gen6, there's a DO instruction to
1704
 * push the mask, and a WHILE to jump back, and BREAK to get out and
1705
 * pop the mask.
1706
 *
1707
 * For gen6, there's no more mask stack, so no need for DO.  WHILE
1708
 * just points back to the first instruction of the loop.
1709
 */
1710
brw_inst *
1711
brw_DO(struct brw_codegen *p, unsigned execute_size)
1712
{
1713
   const struct brw_device_info *devinfo = p->devinfo;
1714
 
1715
   if (devinfo->gen >= 6 || p->single_program_flow) {
1716
      push_loop_stack(p, &p->store[p->nr_insn]);
1717
      return &p->store[p->nr_insn];
1718
   } else {
1719
      brw_inst *insn = next_insn(p, BRW_OPCODE_DO);
1720
 
1721
      push_loop_stack(p, insn);
1722
 
1723
      /* Override the defaults for this instruction:
1724
       */
1725
      brw_set_dest(p, insn, brw_null_reg());
1726
      brw_set_src0(p, insn, brw_null_reg());
1727
      brw_set_src1(p, insn, brw_null_reg());
1728
 
1729
      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1730
      brw_inst_set_exec_size(devinfo, insn, execute_size);
1731
      brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE);
1732
 
1733
      return insn;
1734
   }
1735
}
1736
 
1737
/**
1738
 * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
1739
 * instruction here.
1740
 *
1741
 * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
1742
 * nesting, since it can always just point to the end of the block/current loop.
1743
 */
1744
static void
1745
brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
1746
{
1747
   const struct brw_device_info *devinfo = p->devinfo;
1748
   brw_inst *do_inst = get_inner_do_insn(p);
1749
   brw_inst *inst;
1750
   unsigned br = brw_jump_scale(devinfo);
1751
 
1752
   assert(devinfo->gen < 6);
1753
 
1754
   for (inst = while_inst - 1; inst != do_inst; inst--) {
1755
      /* If the jump count is != 0, that means that this instruction has already
1756
       * been patched because it's part of a loop inside of the one we're
1757
       * patching.
1758
       */
1759
      if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_BREAK &&
1760
          brw_inst_gen4_jump_count(devinfo, inst) == 0) {
1761
         brw_inst_set_gen4_jump_count(devinfo, inst, br*((while_inst - inst) + 1));
1762
      } else if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_CONTINUE &&
1763
                 brw_inst_gen4_jump_count(devinfo, inst) == 0) {
1764
         brw_inst_set_gen4_jump_count(devinfo, inst, br * (while_inst - inst));
1765
      }
1766
   }
1767
}
1768
 
1769
brw_inst *
1770
brw_WHILE(struct brw_codegen *p)
1771
{
1772
   const struct brw_device_info *devinfo = p->devinfo;
1773
   brw_inst *insn, *do_insn;
1774
   unsigned br = brw_jump_scale(devinfo);
1775
 
1776
   if (devinfo->gen >= 6) {
1777
      insn = next_insn(p, BRW_OPCODE_WHILE);
1778
      do_insn = get_inner_do_insn(p);
1779
 
1780
      if (devinfo->gen >= 8) {
1781
         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1782
         brw_set_src0(p, insn, brw_imm_d(0));
1783
         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1784
      } else if (devinfo->gen == 7) {
1785
         brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1786
         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1787
         brw_set_src1(p, insn, brw_imm_w(0));
1788
         brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
1789
      } else {
1790
         brw_set_dest(p, insn, brw_imm_w(0));
1791
         brw_inst_set_gen6_jump_count(devinfo, insn, br * (do_insn - insn));
1792
         brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1793
         brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
1794
      }
1795
 
1796
      brw_inst_set_exec_size(devinfo, insn, p->compressed ? BRW_EXECUTE_16
1797
                                                      : BRW_EXECUTE_8);
1798
   } else {
1799
      if (p->single_program_flow) {
1800
	 insn = next_insn(p, BRW_OPCODE_ADD);
1801
         do_insn = get_inner_do_insn(p);
1802
 
1803
	 brw_set_dest(p, insn, brw_ip_reg());
1804
	 brw_set_src0(p, insn, brw_ip_reg());
1805
	 brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1806
         brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
1807
      } else {
1808
	 insn = next_insn(p, BRW_OPCODE_WHILE);
1809
         do_insn = get_inner_do_insn(p);
1810
 
1811
         assert(brw_inst_opcode(devinfo, do_insn) == BRW_OPCODE_DO);
1812
 
1813
	 brw_set_dest(p, insn, brw_ip_reg());
1814
	 brw_set_src0(p, insn, brw_ip_reg());
1815
	 brw_set_src1(p, insn, brw_imm_d(0));
1816
 
1817
         brw_inst_set_exec_size(devinfo, insn, brw_inst_exec_size(devinfo, do_insn));
1818
         brw_inst_set_gen4_jump_count(devinfo, insn, br * (do_insn - insn + 1));
1819
         brw_inst_set_gen4_pop_count(devinfo, insn, 0);
1820
 
1821
	 brw_patch_break_cont(p, insn);
1822
      }
1823
   }
1824
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
1825
 
1826
   p->loop_stack_depth--;
1827
 
1828
   return insn;
1829
}
1830
 
1831
/* FORWARD JUMPS:
1832
 */
1833
void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
1834
{
1835
   const struct brw_device_info *devinfo = p->devinfo;
1836
   brw_inst *jmp_insn = &p->store[jmp_insn_idx];
1837
   unsigned jmpi = 1;
1838
 
1839
   if (devinfo->gen >= 5)
1840
      jmpi = 2;
1841
 
1842
   assert(brw_inst_opcode(devinfo, jmp_insn) == BRW_OPCODE_JMPI);
1843
   assert(brw_inst_src1_reg_file(devinfo, jmp_insn) == BRW_IMMEDIATE_VALUE);
1844
 
1845
   brw_inst_set_gen4_jump_count(devinfo, jmp_insn,
1846
                                jmpi * (p->nr_insn - jmp_insn_idx - 1));
1847
}
1848
 
1849
/* To integrate with the above, it makes sense that the comparison
1850
 * instruction should populate the flag register.  It might be simpler
1851
 * just to use the flag reg for most WM tasks?
1852
 */
1853
void brw_CMP(struct brw_codegen *p,
1854
	     struct brw_reg dest,
1855
	     unsigned conditional,
1856
	     struct brw_reg src0,
1857
	     struct brw_reg src1)
1858
{
1859
   const struct brw_device_info *devinfo = p->devinfo;
1860
   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
1861
 
1862
   brw_inst_set_cond_modifier(devinfo, insn, conditional);
1863
   brw_set_dest(p, insn, dest);
1864
   brw_set_src0(p, insn, src0);
1865
   brw_set_src1(p, insn, src1);
1866
 
1867
   /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
1868
    * page says:
1869
    *    "Any CMP instruction with a null destination must use a {switch}."
1870
    *
1871
    * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
1872
    * mentioned on their work-arounds pages.
1873
    */
1874
   if (devinfo->gen == 7) {
1875
      if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1876
          dest.nr == BRW_ARF_NULL) {
1877
         brw_inst_set_thread_control(devinfo, insn, BRW_THREAD_SWITCH);
1878
      }
1879
   }
1880
}
1881
 
1882
/***********************************************************************
1883
 * Helpers for the various SEND message types:
1884
 */
1885
 
1886
/** Extended math function, float[8].
1887
 */
1888
void gen4_math(struct brw_codegen *p,
1889
	       struct brw_reg dest,
1890
	       unsigned function,
1891
	       unsigned msg_reg_nr,
1892
	       struct brw_reg src,
1893
	       unsigned precision )
1894
{
1895
   const struct brw_device_info *devinfo = p->devinfo;
1896
   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
1897
   unsigned data_type;
1898
   if (has_scalar_region(src)) {
1899
      data_type = BRW_MATH_DATA_SCALAR;
1900
   } else {
1901
      data_type = BRW_MATH_DATA_VECTOR;
1902
   }
1903
 
1904
   assert(devinfo->gen < 6);
1905
 
1906
   /* Example code doesn't set predicate_control for send
1907
    * instructions.
1908
    */
1909
   brw_inst_set_pred_control(devinfo, insn, 0);
1910
   brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
1911
 
1912
   brw_set_dest(p, insn, dest);
1913
   brw_set_src0(p, insn, src);
1914
   brw_set_math_message(p,
1915
                        insn,
1916
                        function,
1917
                        src.type == BRW_REGISTER_TYPE_D,
1918
                        precision,
1919
                        data_type);
1920
}
1921
 
1922
void gen6_math(struct brw_codegen *p,
1923
	       struct brw_reg dest,
1924
	       unsigned function,
1925
	       struct brw_reg src0,
1926
	       struct brw_reg src1)
1927
{
1928
   const struct brw_device_info *devinfo = p->devinfo;
1929
   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
1930
 
1931
   assert(devinfo->gen >= 6);
1932
 
1933
   assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
1934
          (devinfo->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
1935
   assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
1936
          (devinfo->gen >= 8 && src0.file == BRW_IMMEDIATE_VALUE));
1937
 
1938
   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1939
   if (devinfo->gen == 6) {
1940
      assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1941
      assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1942
   }
1943
 
1944
   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
1945
       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
1946
       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1947
      assert(src0.type != BRW_REGISTER_TYPE_F);
1948
      assert(src1.type != BRW_REGISTER_TYPE_F);
1949
      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
1950
             (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
1951
   } else {
1952
      assert(src0.type == BRW_REGISTER_TYPE_F);
1953
      assert(src1.type == BRW_REGISTER_TYPE_F);
1954
      if (function == BRW_MATH_FUNCTION_POW) {
1955
         assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
1956
                (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
1957
      } else {
1958
         assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1959
                src1.nr == BRW_ARF_NULL);
1960
      }
1961
   }
1962
 
1963
   /* Source modifiers are ignored for extended math instructions on Gen6. */
1964
   if (devinfo->gen == 6) {
1965
      assert(!src0.negate);
1966
      assert(!src0.abs);
1967
      assert(!src1.negate);
1968
      assert(!src1.abs);
1969
   }
1970
 
1971
   brw_inst_set_math_function(devinfo, insn, function);
1972
 
1973
   brw_set_dest(p, insn, dest);
1974
   brw_set_src0(p, insn, src0);
1975
   brw_set_src1(p, insn, src1);
1976
}
1977
 
1978
 
1979
/**
1980
 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1981
 * using a constant offset per channel.
1982
 *
1983
 * The offset must be aligned to oword size (16 bytes).  Used for
1984
 * register spilling.
1985
 */
1986
void brw_oword_block_write_scratch(struct brw_codegen *p,
1987
				   struct brw_reg mrf,
1988
				   int num_regs,
1989
				   unsigned offset)
1990
{
1991
   const struct brw_device_info *devinfo = p->devinfo;
1992
   uint32_t msg_control, msg_type;
1993
   int mlen;
1994
 
1995
   if (devinfo->gen >= 6)
1996
      offset /= 16;
1997
 
1998
   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
1999
 
2000
   if (num_regs == 1) {
2001
      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2002
      mlen = 2;
2003
   } else {
2004
      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2005
      mlen = 3;
2006
   }
2007
 
2008
   /* Set up the message header.  This is g0, with g0.2 filled with
2009
    * the offset.  We don't want to leave our offset around in g0 or
2010
    * it'll screw up texture samples, so set it up inside the message
2011
    * reg.
2012
    */
2013
   {
2014
      brw_push_insn_state(p);
2015
      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2016
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2017
      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2018
 
2019
      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2020
 
2021
      /* set message header global offset field (reg 0, element 2) */
2022
      brw_MOV(p,
2023
	      retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2024
				  mrf.nr,
2025
				  2), BRW_REGISTER_TYPE_UD),
2026
	      brw_imm_ud(offset));
2027
 
2028
      brw_pop_insn_state(p);
2029
   }
2030
 
2031
   {
2032
      struct brw_reg dest;
2033
      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2034
      int send_commit_msg;
2035
      struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
2036
					 BRW_REGISTER_TYPE_UW);
2037
 
2038
      if (brw_inst_qtr_control(devinfo, insn) != BRW_COMPRESSION_NONE) {
2039
         brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
2040
	 src_header = vec16(src_header);
2041
      }
2042
      assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
2043
      if (devinfo->gen < 6)
2044
         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2045
 
2046
      /* Until gen6, writes followed by reads from the same location
2047
       * are not guaranteed to be ordered unless write_commit is set.
2048
       * If set, then a no-op write is issued to the destination
2049
       * register to set a dependency, and a read from the destination
2050
       * can be used to ensure the ordering.
2051
       *
2052
       * For gen6, only writes between different threads need ordering
2053
       * protection.  Our use of DP writes is all about register
2054
       * spilling within a thread.
2055
       */
2056
      if (devinfo->gen >= 6) {
2057
	 dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2058
	 send_commit_msg = 0;
2059
      } else {
2060
	 dest = src_header;
2061
	 send_commit_msg = 1;
2062
      }
2063
 
2064
      brw_set_dest(p, insn, dest);
2065
      if (devinfo->gen >= 6) {
2066
	 brw_set_src0(p, insn, mrf);
2067
      } else {
2068
	 brw_set_src0(p, insn, brw_null_reg());
2069
      }
2070
 
2071
      if (devinfo->gen >= 6)
2072
	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2073
      else
2074
	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
2075
 
2076
      brw_set_dp_write_message(p,
2077
			       insn,
2078
			       255, /* binding table index (255=stateless) */
2079
			       msg_control,
2080
			       msg_type,
2081
			       mlen,
2082
			       true, /* header_present */
2083
			       0, /* not a render target */
2084
			       send_commit_msg, /* response_length */
2085
			       0, /* eot */
2086
			       send_commit_msg);
2087
   }
2088
}
2089
 
2090
 
2091
/**
2092
 * Read a block of owords (half a GRF each) from the scratch buffer
2093
 * using a constant index per channel.
2094
 *
2095
 * Offset must be aligned to oword size (16 bytes).  Used for register
2096
 * spilling.
2097
 */
2098
void
2099
brw_oword_block_read_scratch(struct brw_codegen *p,
2100
			     struct brw_reg dest,
2101
			     struct brw_reg mrf,
2102
			     int num_regs,
2103
			     unsigned offset)
2104
{
2105
   const struct brw_device_info *devinfo = p->devinfo;
2106
   uint32_t msg_control;
2107
   int rlen;
2108
 
2109
   if (devinfo->gen >= 6)
2110
      offset /= 16;
2111
 
2112
   if (p->devinfo->gen >= 7) {
2113
      /* On gen 7 and above, we no longer have message registers and we can
2114
       * send from any register we want.  By using the destination register
2115
       * for the message, we guarantee that the implied message write won't
2116
       * accidentally overwrite anything.  This has been a problem because
2117
       * the MRF registers and source for the final FB write are both fixed
2118
       * and may overlap.
2119
       */
2120
      mrf = retype(dest, BRW_REGISTER_TYPE_UD);
2121
   } else {
2122
      mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2123
   }
2124
   dest = retype(dest, BRW_REGISTER_TYPE_UW);
2125
 
2126
   if (num_regs == 1) {
2127
      msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
2128
      rlen = 1;
2129
   } else {
2130
      msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
2131
      rlen = 2;
2132
   }
2133
 
2134
   {
2135
      brw_push_insn_state(p);
2136
      brw_set_default_exec_size(p, BRW_EXECUTE_8);
2137
      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2138
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2139
 
2140
      brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2141
 
2142
      /* set message header global offset field (reg 0, element 2) */
2143
      brw_MOV(p, get_element_ud(mrf, 2), brw_imm_ud(offset));
2144
 
2145
      brw_pop_insn_state(p);
2146
   }
2147
 
2148
   {
2149
      brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2150
 
2151
      assert(brw_inst_pred_control(devinfo, insn) == 0);
2152
      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
2153
 
2154
      brw_set_dest(p, insn, dest);	/* UW? */
2155
      if (devinfo->gen >= 6) {
2156
	 brw_set_src0(p, insn, mrf);
2157
      } else {
2158
	 brw_set_src0(p, insn, brw_null_reg());
2159
         brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2160
      }
2161
 
2162
      brw_set_dp_read_message(p,
2163
			      insn,
2164
			      255, /* binding table index (255=stateless) */
2165
			      msg_control,
2166
			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
2167
			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
2168
			      1, /* msg_length */
2169
                              true, /* header_present */
2170
			      rlen);
2171
   }
2172
}
2173
 
2174
void
2175
gen7_block_read_scratch(struct brw_codegen *p,
2176
                        struct brw_reg dest,
2177
                        int num_regs,
2178
                        unsigned offset)
2179
{
2180
   const struct brw_device_info *devinfo = p->devinfo;
2181
   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2182
   assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
2183
 
2184
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
2185
   brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
2186
 
2187
   /* The HW requires that the header is present; this is to get the g0.5
2188
    * scratch offset.
2189
    */
2190
   brw_set_src0(p, insn, brw_vec8_grf(0, 0));
2191
 
2192
   /* According to the docs, offset is "A 12-bit HWord offset into the memory
2193
    * Immediate Memory buffer as specified by binding table 0xFF."  An HWORD
2194
    * is 32 bytes, which happens to be the size of a register.
2195
    */
2196
   offset /= REG_SIZE;
2197
   assert(offset < (1 << 12));
2198
 
2199
   gen7_set_dp_scratch_message(p, insn,
2200
                               false, /* scratch read */
2201
                               false, /* OWords */
2202
                               false, /* invalidate after read */
2203
                               num_regs,
2204
                               offset,
2205
                               1,        /* mlen: just g0 */
2206
                               num_regs, /* rlen */
2207
                               true);    /* header present */
2208
}
2209
 
2210
/**
2211
 * Read a float[4] vector from the data port Data Cache (const buffer).
2212
 * Location (in buffer) should be a multiple of 16.
2213
 * Used for fetching shader constants.
2214
 */
2215
void brw_oword_block_read(struct brw_codegen *p,
2216
			  struct brw_reg dest,
2217
			  struct brw_reg mrf,
2218
			  uint32_t offset,
2219
			  uint32_t bind_table_index)
2220
{
2221
   const struct brw_device_info *devinfo = p->devinfo;
2222
 
2223
   /* On newer hardware, offset is in units of owords. */
2224
   if (devinfo->gen >= 6)
2225
      offset /= 16;
2226
 
2227
   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
2228
 
2229
   brw_push_insn_state(p);
2230
   brw_set_default_exec_size(p, BRW_EXECUTE_8);
2231
   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2232
   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
2233
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2234
 
2235
   brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
2236
 
2237
   /* set message header global offset field (reg 0, element 2) */
2238
   brw_MOV(p,
2239
	   retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
2240
			       mrf.nr,
2241
			       2), BRW_REGISTER_TYPE_UD),
2242
	   brw_imm_ud(offset));
2243
 
2244
   brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
2245
 
2246
   /* cast dest to a uword[8] vector */
2247
   dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
2248
 
2249
   brw_set_dest(p, insn, dest);
2250
   if (devinfo->gen >= 6) {
2251
      brw_set_src0(p, insn, mrf);
2252
   } else {
2253
      brw_set_src0(p, insn, brw_null_reg());
2254
      brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
2255
   }
2256
 
2257
   brw_set_dp_read_message(p,
2258
			   insn,
2259
			   bind_table_index,
2260
			   BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
2261
			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
2262
			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2263
			   1, /* msg_length */
2264
                           true, /* header_present */
2265
			   1); /* response_length (1 reg, 2 owords!) */
2266
 
2267
   brw_pop_insn_state(p);
2268
}
2269
 
2270
 
2271
void brw_fb_WRITE(struct brw_codegen *p,
2272
		  int dispatch_width,
2273
                  struct brw_reg payload,
2274
                  struct brw_reg implied_header,
2275
                  unsigned msg_control,
2276
                  unsigned binding_table_index,
2277
                  unsigned msg_length,
2278
                  unsigned response_length,
2279
                  bool eot,
2280
                  bool last_render_target,
2281
                  bool header_present)
2282
{
2283
   const struct brw_device_info *devinfo = p->devinfo;
2284
   brw_inst *insn;
2285
   unsigned msg_type;
2286
   struct brw_reg dest, src0;
2287
 
2288
   if (dispatch_width == 16)
2289
      dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2290
   else
2291
      dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
2292
 
2293
   if (devinfo->gen >= 6) {
2294
      insn = next_insn(p, BRW_OPCODE_SENDC);
2295
   } else {
2296
      insn = next_insn(p, BRW_OPCODE_SEND);
2297
   }
2298
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
2299
 
2300
   if (devinfo->gen >= 6) {
2301
      /* headerless version, just submit color payload */
2302
      src0 = payload;
2303
 
2304
      msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2305
   } else {
2306
      assert(payload.file == BRW_MESSAGE_REGISTER_FILE);
2307
      brw_inst_set_base_mrf(devinfo, insn, payload.nr);
2308
      src0 = implied_header;
2309
 
2310
      msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
2311
   }
2312
 
2313
   brw_set_dest(p, insn, dest);
2314
   brw_set_src0(p, insn, src0);
2315
   brw_set_dp_write_message(p,
2316
			    insn,
2317
			    binding_table_index,
2318
			    msg_control,
2319
			    msg_type,
2320
			    msg_length,
2321
			    header_present,
2322
			    last_render_target,
2323
			    response_length,
2324
			    eot,
2325
 
2326
}
2327
 
2328
 
2329
/**
2330
 * Texture sample instruction.
2331
 * Note: the msg_type plus msg_length values determine exactly what kind
2332
 * of sampling operation is performed.  See volume 4, page 161 of docs.
2333
 */
2334
void brw_SAMPLE(struct brw_codegen *p,
2335
		struct brw_reg dest,
2336
		unsigned msg_reg_nr,
2337
		struct brw_reg src0,
2338
		unsigned binding_table_index,
2339
		unsigned sampler,
2340
		unsigned msg_type,
2341
		unsigned response_length,
2342
		unsigned msg_length,
2343
		unsigned header_present,
2344
		unsigned simd_mode,
2345
		unsigned return_format)
2346
{
2347
   const struct brw_device_info *devinfo = p->devinfo;
2348
   brw_inst *insn;
2349
 
2350
   if (msg_reg_nr != -1)
2351
      gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2352
 
2353
   insn = next_insn(p, BRW_OPCODE_SEND);
2354
   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); /* XXX */
2355
 
2356
   /* From the 965 PRM (volume 4, part 1, section 14.2.41):
2357
    *
2358
    *    "Instruction compression is not allowed for this instruction (that
2359
    *     is, send). The hardware behavior is undefined if this instruction is
2360
    *     set as compressed. However, compress control can be set to "SecHalf"
2361
    *     to affect the EMask generation."
2362
    *
2363
    * No similar wording is found in later PRMs, but there are examples
2364
    * utilizing send with SecHalf.  More importantly, SIMD8 sampler messages
2365
    * are allowed in SIMD16 mode and they could not work without SecHalf.  For
2366
    * these reasons, we allow BRW_COMPRESSION_2NDHALF here.
2367
    */
2368
   if (brw_inst_qtr_control(devinfo, insn) != BRW_COMPRESSION_2NDHALF)
2369
      brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
2370
 
2371
   if (devinfo->gen < 6)
2372
      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2373
 
2374
   brw_set_dest(p, insn, dest);
2375
   brw_set_src0(p, insn, src0);
2376
   brw_set_sampler_message(p, insn,
2377
                           binding_table_index,
2378
                           sampler,
2379
                           msg_type,
2380
                           response_length,
2381
                           msg_length,
2382
                           header_present,
2383
                           simd_mode,
2384
                           return_format);
2385
}
2386
 
2387
/* Adjust the message header's sampler state pointer to
2388
 * select the correct group of 16 samplers.
2389
 */
2390
void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
2391
                                      struct brw_reg header,
2392
                                      struct brw_reg sampler_index)
2393
{
2394
   /* The "Sampler Index" field can only store values between 0 and 15.
2395
    * However, we can add an offset to the "Sampler State Pointer"
2396
    * field, effectively selecting a different set of 16 samplers.
2397
    *
2398
    * The "Sampler State Pointer" needs to be aligned to a 32-byte
2399
    * offset, and each sampler state is only 16-bytes, so we can't
2400
    * exclusively use the offset - we have to use both.
2401
    */
2402
 
2403
   const struct brw_device_info *devinfo = p->devinfo;
2404
 
2405
   if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
2406
      const int sampler_state_size = 16; /* 16 bytes */
2407
      uint32_t sampler = sampler_index.dw1.ud;
2408
 
2409
      if (sampler >= 16) {
2410
         assert(devinfo->is_haswell || devinfo->gen >= 8);
2411
         brw_ADD(p,
2412
                 get_element_ud(header, 3),
2413
                 get_element_ud(brw_vec8_grf(0, 0), 3),
2414
                 brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
2415
      }
2416
   } else {
2417
      /* Non-const sampler array indexing case */
2418
      if (devinfo->gen < 8 && !devinfo->is_haswell) {
2419
         return;
2420
      }
2421
 
2422
      struct brw_reg temp = get_element_ud(header, 3);
2423
 
2424
      brw_AND(p, temp, get_element_ud(sampler_index, 0), brw_imm_ud(0x0f0));
2425
      brw_SHL(p, temp, temp, brw_imm_ud(4));
2426
      brw_ADD(p,
2427
              get_element_ud(header, 3),
2428
              get_element_ud(brw_vec8_grf(0, 0), 3),
2429
              temp);
2430
   }
2431
}
2432
 
2433
/* All these variables are pretty confusing - we might be better off
2434
 * using bitmasks and macros for this, in the old style.  Or perhaps
2435
 * just having the caller instantiate the fields in dword3 itself.
2436
 */
2437
void brw_urb_WRITE(struct brw_codegen *p,
2438
		   struct brw_reg dest,
2439
		   unsigned msg_reg_nr,
2440
		   struct brw_reg src0,
2441
                   enum brw_urb_write_flags flags,
2442
		   unsigned msg_length,
2443
		   unsigned response_length,
2444
		   unsigned offset,
2445
		   unsigned swizzle)
2446
{
2447
   const struct brw_device_info *devinfo = p->devinfo;
2448
   brw_inst *insn;
2449
 
2450
   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2451
 
2452
   if (devinfo->gen >= 7 && !(flags & BRW_URB_WRITE_USE_CHANNEL_MASKS)) {
2453
      /* Enable Channel Masks in the URB_WRITE_HWORD message header */
2454
      brw_push_insn_state(p);
2455
      brw_set_default_access_mode(p, BRW_ALIGN_1);
2456
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2457
      brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
2458
		       BRW_REGISTER_TYPE_UD),
2459
	        retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
2460
		brw_imm_ud(0xff00));
2461
      brw_pop_insn_state(p);
2462
   }
2463
 
2464
   insn = next_insn(p, BRW_OPCODE_SEND);
2465
 
2466
   assert(msg_length < BRW_MAX_MRF);
2467
 
2468
   brw_set_dest(p, insn, dest);
2469
   brw_set_src0(p, insn, src0);
2470
   brw_set_src1(p, insn, brw_imm_d(0));
2471
 
2472
   if (devinfo->gen < 6)
2473
      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2474
 
2475
   brw_set_urb_message(p,
2476
		       insn,
2477
		       flags,
2478
		       msg_length,
2479
		       response_length,
2480
		       offset,
2481
		       swizzle);
2482
}
2483
 
2484
struct brw_inst *
2485
brw_send_indirect_message(struct brw_codegen *p,
2486
                          unsigned sfid,
2487
                          struct brw_reg dst,
2488
                          struct brw_reg payload,
2489
                          struct brw_reg desc)
2490
{
2491
   const struct brw_device_info *devinfo = p->devinfo;
2492
   struct brw_inst *send, *setup;
2493
 
2494
   assert(desc.type == BRW_REGISTER_TYPE_UD);
2495
 
2496
   if (desc.file == BRW_IMMEDIATE_VALUE) {
2497
      setup = send = next_insn(p, BRW_OPCODE_SEND);
2498
      brw_set_src1(p, send, desc);
2499
 
2500
   } else {
2501
      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2502
 
2503
      brw_push_insn_state(p);
2504
      brw_set_default_access_mode(p, BRW_ALIGN_1);
2505
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2506
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2507
 
2508
      /* Load the indirect descriptor to an address register using OR so the
2509
       * caller can specify additional descriptor bits with the usual
2510
       * brw_set_*_message() helper functions.
2511
       */
2512
      setup = brw_OR(p, addr, desc, brw_imm_ud(0));
2513
 
2514
      brw_pop_insn_state(p);
2515
 
2516
      send = next_insn(p, BRW_OPCODE_SEND);
2517
      brw_set_src1(p, send, addr);
2518
   }
2519
 
2520
   brw_set_dest(p, send, dst);
2521
   brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
2522
   brw_inst_set_sfid(devinfo, send, sfid);
2523
 
2524
   return setup;
2525
}
2526
 
2527
static struct brw_inst *
2528
brw_send_indirect_surface_message(struct brw_codegen *p,
2529
                                  unsigned sfid,
2530
                                  struct brw_reg dst,
2531
                                  struct brw_reg payload,
2532
                                  struct brw_reg surface,
2533
                                  unsigned message_len,
2534
                                  unsigned response_len,
2535
                                  bool header_present)
2536
{
2537
   const struct brw_device_info *devinfo = p->devinfo;
2538
   struct brw_inst *insn;
2539
 
2540
   if (surface.file != BRW_IMMEDIATE_VALUE) {
2541
      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
2542
 
2543
      brw_push_insn_state(p);
2544
      brw_set_default_access_mode(p, BRW_ALIGN_1);
2545
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
2546
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
2547
 
2548
      /* Mask out invalid bits from the surface index to avoid hangs e.g. when
2549
       * some surface array is accessed out of bounds.
2550
       */
2551
      insn = brw_AND(p, addr,
2552
                     suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)),
2553
                               BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)),
2554
                     brw_imm_ud(0xff));
2555
 
2556
      brw_pop_insn_state(p);
2557
 
2558
      surface = addr;
2559
   }
2560
 
2561
   insn = brw_send_indirect_message(p, sfid, dst, payload, surface);
2562
   brw_inst_set_mlen(devinfo, insn, message_len);
2563
   brw_inst_set_rlen(devinfo, insn, response_len);
2564
   brw_inst_set_header_present(devinfo, insn, header_present);
2565
 
2566
   return insn;
2567
}
2568
 
2569
static int
2570
brw_find_next_block_end(struct brw_codegen *p, int start_offset)
2571
{
2572
   int offset;
2573
   void *store = p->store;
2574
   const struct brw_device_info *devinfo = p->devinfo;
2575
 
2576
   for (offset = next_offset(devinfo, store, start_offset);
2577
        offset < p->next_insn_offset;
2578
        offset = next_offset(devinfo, store, offset)) {
2579
      brw_inst *insn = store + offset;
2580
 
2581
      switch (brw_inst_opcode(devinfo, insn)) {
2582
      case BRW_OPCODE_ENDIF:
2583
      case BRW_OPCODE_ELSE:
2584
      case BRW_OPCODE_WHILE:
2585
      case BRW_OPCODE_HALT:
2586
	 return offset;
2587
      }
2588
   }
2589
 
2590
   return 0;
2591
}
2592
 
2593
/* There is no DO instruction on gen6, so to find the end of the loop
2594
 * we have to see if the loop is jumping back before our start
2595
 * instruction.
2596
 */
2597
static int
2598
brw_find_loop_end(struct brw_codegen *p, int start_offset)
2599
{
2600
   const struct brw_device_info *devinfo = p->devinfo;
2601
   int offset;
2602
   int scale = 16 / brw_jump_scale(devinfo);
2603
   void *store = p->store;
2604
 
2605
   assert(devinfo->gen >= 6);
2606
 
2607
   /* Always start after the instruction (such as a WHILE) we're trying to fix
2608
    * up.
2609
    */
2610
   for (offset = next_offset(devinfo, store, start_offset);
2611
        offset < p->next_insn_offset;
2612
        offset = next_offset(devinfo, store, offset)) {
2613
      brw_inst *insn = store + offset;
2614
 
2615
      if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE) {
2616
         int jip = devinfo->gen == 6 ? brw_inst_gen6_jump_count(devinfo, insn)
2617
                                     : brw_inst_jip(devinfo, insn);
2618
	 if (offset + jip * scale <= start_offset)
2619
	    return offset;
2620
      }
2621
   }
2622
   assert(!"not reached");
2623
   return start_offset;
2624
}
2625
 
2626
/* After program generation, go back and update the UIP and JIP of
2627
 * BREAK, CONT, and HALT instructions to their correct locations.
2628
 */
2629
void
2630
brw_set_uip_jip(struct brw_codegen *p)
2631
{
2632
   const struct brw_device_info *devinfo = p->devinfo;
2633
   int offset;
2634
   int br = brw_jump_scale(devinfo);
2635
   int scale = 16 / br;
2636
   void *store = p->store;
2637
 
2638
   if (devinfo->gen < 6)
2639
      return;
2640
 
2641
   for (offset = 0; offset < p->next_insn_offset;
2642
        offset = next_offset(devinfo, store, offset)) {
2643
      brw_inst *insn = store + offset;
2644
 
2645
      if (brw_inst_cmpt_control(devinfo, insn)) {
2646
	 /* Fixups for compacted BREAK/CONTINUE not supported yet. */
2647
         assert(brw_inst_opcode(devinfo, insn) != BRW_OPCODE_BREAK &&
2648
                brw_inst_opcode(devinfo, insn) != BRW_OPCODE_CONTINUE &&
2649
                brw_inst_opcode(devinfo, insn) != BRW_OPCODE_HALT);
2650
	 continue;
2651
      }
2652
 
2653
      int block_end_offset = brw_find_next_block_end(p, offset);
2654
      switch (brw_inst_opcode(devinfo, insn)) {
2655
      case BRW_OPCODE_BREAK:
2656
         assert(block_end_offset != 0);
2657
         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2658
	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
2659
         brw_inst_set_uip(devinfo, insn,
2660
	    (brw_find_loop_end(p, offset) - offset +
2661
             (devinfo->gen == 6 ? 16 : 0)) / scale);
2662
	 break;
2663
      case BRW_OPCODE_CONTINUE:
2664
         assert(block_end_offset != 0);
2665
         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2666
         brw_inst_set_uip(devinfo, insn,
2667
            (brw_find_loop_end(p, offset) - offset) / scale);
2668
 
2669
         assert(brw_inst_uip(devinfo, insn) != 0);
2670
         assert(brw_inst_jip(devinfo, insn) != 0);
2671
	 break;
2672
 
2673
      case BRW_OPCODE_ENDIF: {
2674
         int32_t jump = (block_end_offset == 0) ?
2675
                        1 * br : (block_end_offset - offset) / scale;
2676
         if (devinfo->gen >= 7)
2677
            brw_inst_set_jip(devinfo, insn, jump);
2678
         else
2679
            brw_inst_set_gen6_jump_count(devinfo, insn, jump);
2680
	 break;
2681
      }
2682
 
2683
      case BRW_OPCODE_HALT:
2684
	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
2685
	  *
2686
	  *    "In case of the halt instruction not inside any conditional
2687
	  *     code block, the value of  and  should be the
2688
	  *     same. In case of the halt instruction inside conditional code
2689
	  *     block, the  should be the end of the program, and the
2690
	  *      should be end of the most inner conditional code block."
2691
	  *
2692
	  * The uip will have already been set by whoever set up the
2693
	  * instruction.
2694
	  */
2695
	 if (block_end_offset == 0) {
2696
            brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
2697
	 } else {
2698
            brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
2699
	 }
2700
         assert(brw_inst_uip(devinfo, insn) != 0);
2701
         assert(brw_inst_jip(devinfo, insn) != 0);
2702
	 break;
2703
      }
2704
   }
2705
}
2706
 
2707
void brw_ff_sync(struct brw_codegen *p,
2708
		   struct brw_reg dest,
2709
		   unsigned msg_reg_nr,
2710
		   struct brw_reg src0,
2711
		   bool allocate,
2712
		   unsigned response_length,
2713
		   bool eot)
2714
{
2715
   const struct brw_device_info *devinfo = p->devinfo;
2716
   brw_inst *insn;
2717
 
2718
   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2719
 
2720
   insn = next_insn(p, BRW_OPCODE_SEND);
2721
   brw_set_dest(p, insn, dest);
2722
   brw_set_src0(p, insn, src0);
2723
   brw_set_src1(p, insn, brw_imm_d(0));
2724
 
2725
   if (devinfo->gen < 6)
2726
      brw_inst_set_base_mrf(devinfo, insn, msg_reg_nr);
2727
 
2728
   brw_set_ff_sync_message(p,
2729
			   insn,
2730
			   allocate,
2731
			   response_length,
2732
			   eot);
2733
}
2734
 
2735
/**
2736
 * Emit the SEND instruction necessary to generate stream output data on Gen6
2737
 * (for transform feedback).
2738
 *
2739
 * If send_commit_msg is true, this is the last piece of stream output data
2740
 * from this thread, so send the data as a committed write.  According to the
2741
 * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
2742
 *
2743
 *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
2744
 *   writes are complete by sending the final write as a committed write."
2745
 */
2746
void
2747
brw_svb_write(struct brw_codegen *p,
2748
              struct brw_reg dest,
2749
              unsigned msg_reg_nr,
2750
              struct brw_reg src0,
2751
              unsigned binding_table_index,
2752
              bool   send_commit_msg)
2753
{
2754
   brw_inst *insn;
2755
 
2756
   gen6_resolve_implied_move(p, &src0, msg_reg_nr);
2757
 
2758
   insn = next_insn(p, BRW_OPCODE_SEND);
2759
   brw_set_dest(p, insn, dest);
2760
   brw_set_src0(p, insn, src0);
2761
   brw_set_src1(p, insn, brw_imm_d(0));
2762
   brw_set_dp_write_message(p, insn,
2763
                            binding_table_index,
2764
                            0, /* msg_control: ignored */
2765
                            GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
2766
                            1, /* msg_length */
2767
                            true, /* header_present */
2768
                            0, /* last_render_target: ignored */
2769
                            send_commit_msg, /* response_length */
2770
                            0, /* end_of_thread */
2771
                            send_commit_msg); /* send_commit_msg */
2772
}
2773
 
2774
static unsigned
2775
brw_surface_payload_size(struct brw_codegen *p,
2776
                         unsigned num_channels,
2777
                         bool has_simd4x2,
2778
                         bool has_simd16)
2779
{
2780
   if (has_simd4x2 && brw_inst_access_mode(p->devinfo, p->current) == BRW_ALIGN_16)
2781
      return 1;
2782
   else if (has_simd16 && p->compressed)
2783
      return 2 * num_channels;
2784
   else
2785
      return num_channels;
2786
}
2787
 
2788
static void
2789
brw_set_dp_untyped_atomic_message(struct brw_codegen *p,
2790
                                  brw_inst *insn,
2791
                                  unsigned atomic_op,
2792
                                  bool response_expected)
2793
{
2794
   const struct brw_device_info *devinfo = p->devinfo;
2795
   unsigned msg_control =
2796
      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
2797
      (response_expected ? 1 << 5 : 0); /* Return data expected */
2798
 
2799
   if (devinfo->gen >= 8 || devinfo->is_haswell) {
2800
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
2801
         if (!p->compressed)
2802
            msg_control |= 1 << 4; /* SIMD8 mode */
2803
 
2804
         brw_inst_set_dp_msg_type(devinfo, insn,
2805
                                  HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP);
2806
      } else {
2807
         brw_inst_set_dp_msg_type(devinfo, insn,
2808
            HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2);
2809
      }
2810
   } else {
2811
      brw_inst_set_dp_msg_type(devinfo, insn,
2812
                               GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
2813
 
2814
      if (!p->compressed)
2815
         msg_control |= 1 << 4; /* SIMD8 mode */
2816
   }
2817
 
2818
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
2819
}
2820
 
2821
void
2822
brw_untyped_atomic(struct brw_codegen *p,
2823
                   struct brw_reg dst,
2824
                   struct brw_reg payload,
2825
                   struct brw_reg surface,
2826
                   unsigned atomic_op,
2827
                   unsigned msg_length,
2828
                   bool response_expected)
2829
{
2830
   const struct brw_device_info *devinfo = p->devinfo;
2831
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2832
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2833
                          GEN7_SFID_DATAPORT_DATA_CACHE);
2834
   const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
2835
   /* Mask out unused components -- This is especially important in Align16
2836
    * mode on generations that don't have native support for SIMD4x2 atomics,
2837
    * because unused but enabled components will cause the dataport to perform
2838
    * additional atomic operations on the addresses that happen to be in the
2839
    * uninitialized Y, Z and W coordinates of the payload.
2840
    */
2841
   const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
2842
   struct brw_inst *insn = brw_send_indirect_surface_message(
2843
      p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
2844
      brw_surface_payload_size(p, response_expected,
2845
                               devinfo->gen >= 8 || devinfo->is_haswell, true),
2846
      align1);
2847
 
2848
   brw_set_dp_untyped_atomic_message(
2849
      p, insn, atomic_op, response_expected);
2850
}
2851
 
2852
static void
2853
brw_set_dp_untyped_surface_read_message(struct brw_codegen *p,
2854
                                        struct brw_inst *insn,
2855
                                        unsigned num_channels)
2856
{
2857
   const struct brw_device_info *devinfo = p->devinfo;
2858
   /* Set mask of 32-bit channels to drop. */
2859
   unsigned msg_control = 0xf & (0xf << num_channels);
2860
 
2861
   if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
2862
      if (p->compressed)
2863
         msg_control |= 1 << 4; /* SIMD16 mode */
2864
      else
2865
         msg_control |= 2 << 4; /* SIMD8 mode */
2866
   }
2867
 
2868
   brw_inst_set_dp_msg_type(devinfo, insn,
2869
                            (devinfo->gen >= 8 || devinfo->is_haswell ?
2870
                             HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ :
2871
                             GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ));
2872
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
2873
}
2874
 
2875
void
2876
brw_untyped_surface_read(struct brw_codegen *p,
2877
                         struct brw_reg dst,
2878
                         struct brw_reg payload,
2879
                         struct brw_reg surface,
2880
                         unsigned msg_length,
2881
                         unsigned num_channels)
2882
{
2883
   const struct brw_device_info *devinfo = p->devinfo;
2884
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2885
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2886
                          GEN7_SFID_DATAPORT_DATA_CACHE);
2887
   const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
2888
   struct brw_inst *insn = brw_send_indirect_surface_message(
2889
      p, sfid, dst, payload, surface, msg_length,
2890
      brw_surface_payload_size(p, num_channels, true, true),
2891
      align1);
2892
 
2893
   brw_set_dp_untyped_surface_read_message(
2894
      p, insn, num_channels);
2895
}
2896
 
2897
static void
2898
brw_set_dp_untyped_surface_write_message(struct brw_codegen *p,
2899
                                         struct brw_inst *insn,
2900
                                         unsigned num_channels)
2901
{
2902
   const struct brw_device_info *devinfo = p->devinfo;
2903
   /* Set mask of 32-bit channels to drop. */
2904
   unsigned msg_control = 0xf & (0xf << num_channels);
2905
 
2906
   if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
2907
      if (p->compressed)
2908
         msg_control |= 1 << 4; /* SIMD16 mode */
2909
      else
2910
         msg_control |= 2 << 4; /* SIMD8 mode */
2911
   } else {
2912
      if (devinfo->gen >= 8 || devinfo->is_haswell)
2913
         msg_control |= 0 << 4; /* SIMD4x2 mode */
2914
      else
2915
         msg_control |= 2 << 4; /* SIMD8 mode */
2916
   }
2917
 
2918
   brw_inst_set_dp_msg_type(devinfo, insn,
2919
                            devinfo->gen >= 8 || devinfo->is_haswell ?
2920
                             HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
2921
                             GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);
2922
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
2923
}
2924
 
2925
void
2926
brw_untyped_surface_write(struct brw_codegen *p,
2927
                          struct brw_reg payload,
2928
                          struct brw_reg surface,
2929
                          unsigned msg_length,
2930
                          unsigned num_channels)
2931
{
2932
   const struct brw_device_info *devinfo = p->devinfo;
2933
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2934
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2935
                          GEN7_SFID_DATAPORT_DATA_CACHE);
2936
   const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
2937
   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2938
   const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
2939
                          WRITEMASK_X : WRITEMASK_XYZW;
2940
   struct brw_inst *insn = brw_send_indirect_surface_message(
2941
      p, sfid, brw_writemask(brw_null_reg(), mask),
2942
      payload, surface, msg_length, 0, align1);
2943
 
2944
   brw_set_dp_untyped_surface_write_message(
2945
      p, insn, num_channels);
2946
}
2947
 
2948
static void
2949
brw_set_dp_typed_atomic_message(struct brw_codegen *p,
2950
                                struct brw_inst *insn,
2951
                                unsigned atomic_op,
2952
                                bool response_expected)
2953
{
2954
   const struct brw_device_info *devinfo = p->devinfo;
2955
   unsigned msg_control =
2956
      atomic_op | /* Atomic Operation Type: BRW_AOP_* */
2957
      (response_expected ? 1 << 5 : 0); /* Return data expected */
2958
 
2959
   if (devinfo->gen >= 8 || devinfo->is_haswell) {
2960
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
2961
         if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
2962
            msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
2963
 
2964
         brw_inst_set_dp_msg_type(devinfo, insn,
2965
                                  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP);
2966
      } else {
2967
         brw_inst_set_dp_msg_type(devinfo, insn,
2968
                                  HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2);
2969
      }
2970
 
2971
   } else {
2972
      brw_inst_set_dp_msg_type(devinfo, insn,
2973
                               GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
2974
 
2975
      if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
2976
         msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
2977
   }
2978
 
2979
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
2980
}
2981
 
2982
void
2983
brw_typed_atomic(struct brw_codegen *p,
2984
                 struct brw_reg dst,
2985
                 struct brw_reg payload,
2986
                 struct brw_reg surface,
2987
                 unsigned atomic_op,
2988
                 unsigned msg_length,
2989
                 bool response_expected) {
2990
   const struct brw_device_info *devinfo = p->devinfo;
2991
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
2992
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
2993
                          GEN6_SFID_DATAPORT_RENDER_CACHE);
2994
   const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
2995
   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
2996
   const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
2997
   struct brw_inst *insn = brw_send_indirect_surface_message(
2998
      p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
2999
      brw_surface_payload_size(p, response_expected,
3000
                               devinfo->gen >= 8 || devinfo->is_haswell, false),
3001
      true);
3002
 
3003
   brw_set_dp_typed_atomic_message(
3004
      p, insn, atomic_op, response_expected);
3005
}
3006
 
3007
static void
3008
brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
3009
                                      struct brw_inst *insn,
3010
                                      unsigned num_channels)
3011
{
3012
   const struct brw_device_info *devinfo = p->devinfo;
3013
   /* Set mask of unused channels. */
3014
   unsigned msg_control = 0xf & (0xf << num_channels);
3015
 
3016
   if (devinfo->gen >= 8 || devinfo->is_haswell) {
3017
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
3018
         if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
3019
            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
3020
         else
3021
            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
3022
      }
3023
 
3024
      brw_inst_set_dp_msg_type(devinfo, insn,
3025
                               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
3026
   } else {
3027
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
3028
         if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
3029
            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
3030
      }
3031
 
3032
      brw_inst_set_dp_msg_type(devinfo, insn,
3033
                               GEN7_DATAPORT_RC_TYPED_SURFACE_READ);
3034
   }
3035
 
3036
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
3037
}
3038
 
3039
void
3040
brw_typed_surface_read(struct brw_codegen *p,
3041
                       struct brw_reg dst,
3042
                       struct brw_reg payload,
3043
                       struct brw_reg surface,
3044
                       unsigned msg_length,
3045
                       unsigned num_channels)
3046
{
3047
   const struct brw_device_info *devinfo = p->devinfo;
3048
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
3049
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3050
                          GEN6_SFID_DATAPORT_RENDER_CACHE);
3051
   struct brw_inst *insn = brw_send_indirect_surface_message(
3052
      p, sfid, dst, payload, surface, msg_length,
3053
      brw_surface_payload_size(p, num_channels,
3054
                               devinfo->gen >= 8 || devinfo->is_haswell, false),
3055
      true);
3056
 
3057
   brw_set_dp_typed_surface_read_message(
3058
      p, insn, num_channels);
3059
}
3060
 
3061
static void
3062
brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
3063
                                       struct brw_inst *insn,
3064
                                       unsigned num_channels)
3065
{
3066
   const struct brw_device_info *devinfo = p->devinfo;
3067
   /* Set mask of unused channels. */
3068
   unsigned msg_control = 0xf & (0xf << num_channels);
3069
 
3070
   if (devinfo->gen >= 8 || devinfo->is_haswell) {
3071
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
3072
         if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
3073
            msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
3074
         else
3075
            msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
3076
      }
3077
 
3078
      brw_inst_set_dp_msg_type(devinfo, insn,
3079
                               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE);
3080
 
3081
   } else {
3082
      if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
3083
         if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
3084
            msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
3085
      }
3086
 
3087
      brw_inst_set_dp_msg_type(devinfo, insn,
3088
                               GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE);
3089
   }
3090
 
3091
   brw_inst_set_dp_msg_control(devinfo, insn, msg_control);
3092
}
3093
 
3094
void
3095
brw_typed_surface_write(struct brw_codegen *p,
3096
                        struct brw_reg payload,
3097
                        struct brw_reg surface,
3098
                        unsigned msg_length,
3099
                        unsigned num_channels)
3100
{
3101
   const struct brw_device_info *devinfo = p->devinfo;
3102
   const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
3103
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3104
                          GEN6_SFID_DATAPORT_RENDER_CACHE);
3105
   const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
3106
   /* Mask out unused components -- See comment in brw_untyped_atomic(). */
3107
   const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
3108
                          WRITEMASK_X : WRITEMASK_XYZW);
3109
   struct brw_inst *insn = brw_send_indirect_surface_message(
3110
      p, sfid, brw_writemask(brw_null_reg(), mask),
3111
      payload, surface, msg_length, 0, true);
3112
 
3113
   brw_set_dp_typed_surface_write_message(
3114
      p, insn, num_channels);
3115
}
3116
 
3117
static void
3118
brw_set_memory_fence_message(struct brw_codegen *p,
3119
                             struct brw_inst *insn,
3120
                             enum brw_message_target sfid,
3121
                             bool commit_enable)
3122
{
3123
   const struct brw_device_info *devinfo = p->devinfo;
3124
 
3125
   brw_set_message_descriptor(p, insn, sfid,
3126
                              1 /* message length */,
3127
                              (commit_enable ? 1 : 0) /* response length */,
3128
                              true /* header present */,
3129
                              false);
3130
 
3131
   switch (sfid) {
3132
   case GEN6_SFID_DATAPORT_RENDER_CACHE:
3133
      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_RC_MEMORY_FENCE);
3134
      break;
3135
   case GEN7_SFID_DATAPORT_DATA_CACHE:
3136
      brw_inst_set_dp_msg_type(devinfo, insn, GEN7_DATAPORT_DC_MEMORY_FENCE);
3137
      break;
3138
   default:
3139
      unreachable("Not reached");
3140
   }
3141
 
3142
   if (commit_enable)
3143
      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
3144
}
3145
 
3146
void
3147
brw_memory_fence(struct brw_codegen *p,
3148
                 struct brw_reg dst)
3149
{
3150
   const struct brw_device_info *devinfo = p->devinfo;
3151
   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
3152
   struct brw_inst *insn;
3153
 
3154
   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
3155
    * message doesn't write anything back.
3156
    */
3157
   insn = next_insn(p, BRW_OPCODE_SEND);
3158
   brw_set_dest(p, insn, dst);
3159
   brw_set_src0(p, insn, dst);
3160
   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
3161
                                commit_enable);
3162
 
3163
   if (devinfo->gen == 7 && !devinfo->is_haswell) {
3164
      /* IVB does typed surface access through the render cache, so we need to
3165
       * flush it too.  Use a different register so both flushes can be
3166
       * pipelined by the hardware.
3167
       */
3168
      insn = next_insn(p, BRW_OPCODE_SEND);
3169
      brw_set_dest(p, insn, offset(dst, 1));
3170
      brw_set_src0(p, insn, offset(dst, 1));
3171
      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
3172
                                   commit_enable);
3173
 
3174
      /* Now write the response of the second message into the response of the
3175
       * first to trigger a pipeline stall -- This way future render and data
3176
       * cache messages will be properly ordered with respect to past data and
3177
       * render cache messages.
3178
       */
3179
      brw_push_insn_state(p);
3180
      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
3181
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3182
      brw_MOV(p, dst, offset(dst, 1));
3183
      brw_pop_insn_state(p);
3184
   }
3185
}
3186
 
3187
void
3188
brw_pixel_interpolator_query(struct brw_codegen *p,
3189
                             struct brw_reg dest,
3190
                             struct brw_reg mrf,
3191
                             bool noperspective,
3192
                             unsigned mode,
3193
                             unsigned data,
3194
                             unsigned msg_length,
3195
                             unsigned response_length)
3196
{
3197
   const struct brw_device_info *devinfo = p->devinfo;
3198
   struct brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
3199
 
3200
   brw_set_dest(p, insn, dest);
3201
   brw_set_src0(p, insn, mrf);
3202
   brw_set_message_descriptor(p, insn, GEN7_SFID_PIXEL_INTERPOLATOR,
3203
                              msg_length, response_length,
3204
                              false /* header is never present for PI */,
3205
                              false);
3206
 
3207
   brw_inst_set_pi_simd_mode(
3208
         devinfo, insn, brw_inst_exec_size(devinfo, insn) == BRW_EXECUTE_16);
3209
   brw_inst_set_pi_slot_group(devinfo, insn, 0); /* zero unless 32/64px dispatch */
3210
   brw_inst_set_pi_nopersp(devinfo, insn, noperspective);
3211
   brw_inst_set_pi_message_type(devinfo, insn, mode);
3212
   brw_inst_set_pi_message_data(devinfo, insn, data);
3213
}
3214
 
3215
void
3216
brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
3217
{
3218
   const struct brw_device_info *devinfo = p->devinfo;
3219
   brw_inst *inst;
3220
 
3221
   assert(devinfo->gen >= 7);
3222
 
3223
   brw_push_insn_state(p);
3224
 
3225
   if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
3226
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3227
 
3228
      if (devinfo->gen >= 8) {
3229
         /* Getting the first active channel index is easy on Gen8: Just find
3230
          * the first bit set in the mask register.  The same register exists
3231
          * on HSW already but it reads back as all ones when the current
3232
          * instruction has execution masking disabled, so it's kind of
3233
          * useless.
3234
          */
3235
         inst = brw_FBL(p, vec1(dst),
3236
                        retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
3237
 
3238
         /* Quarter control has the effect of magically shifting the value of
3239
          * this register.  Make sure it's set to zero.
3240
          */
3241
         brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q);
3242
      } else {
3243
         const struct brw_reg flag = retype(brw_flag_reg(1, 0),
3244
                                            BRW_REGISTER_TYPE_UD);
3245
 
3246
         brw_MOV(p, flag, brw_imm_ud(0));
3247
 
3248
         /* Run a 16-wide instruction returning zero with execution masking
3249
          * and a conditional modifier enabled in order to get the current
3250
          * execution mask in f1.0.
3251
          */
3252
         inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0));
3253
         brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16);
3254
         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3255
         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
3256
         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3257
 
3258
         brw_FBL(p, vec1(dst), flag);
3259
      }
3260
   } else {
3261
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3262
 
3263
      if (devinfo->gen >= 8) {
3264
         /* In SIMD4x2 mode the first active channel index is just the
3265
          * negation of the first bit of the mask register.
3266
          */
3267
         inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
3268
                        negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
3269
                        brw_imm_ud(1));
3270
 
3271
      } else {
3272
         /* Overwrite the destination without and with execution masking to
3273
          * find out which of the channels is active.
3274
          */
3275
         brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3276
                 brw_imm_ud(1));
3277
 
3278
         inst = brw_MOV(p, brw_writemask(vec4(dst), WRITEMASK_X),
3279
                        brw_imm_ud(0));
3280
         brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
3281
      }
3282
   }
3283
 
3284
   brw_pop_insn_state(p);
3285
}
3286
 
3287
void
3288
brw_broadcast(struct brw_codegen *p,
3289
              struct brw_reg dst,
3290
              struct brw_reg src,
3291
              struct brw_reg idx)
3292
{
3293
   const struct brw_device_info *devinfo = p->devinfo;
3294
   const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
3295
   brw_inst *inst;
3296
 
3297
   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
3298
          src.address_mode == BRW_ADDRESS_DIRECT);
3299
 
3300
   if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
3301
       idx.file == BRW_IMMEDIATE_VALUE) {
3302
      /* Trivial, the source is already uniform or the index is a constant.
3303
       * We will typically not get here if the optimizer is doing its job, but
3304
       * asserting would be mean.
3305
       */
3306
      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0;
3307
      brw_MOV(p, dst,
3308
              (align1 ? stride(suboffset(src, i), 0, 1, 0) :
3309
               stride(suboffset(src, 4 * i), 0, 4, 1)));
3310
   } else {
3311
      if (align1) {
3312
         const struct brw_reg addr =
3313
            retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
3314
         const unsigned offset = src.nr * REG_SIZE + src.subnr;
3315
         /* Limit in bytes of the signed indirect addressing immediate. */
3316
         const unsigned limit = 512;
3317
 
3318
         brw_push_insn_state(p);
3319
         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3320
         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
3321
 
3322
         /* Take into account the component size and horizontal stride. */
3323
         assert(src.vstride == src.hstride + src.width);
3324
         brw_SHL(p, addr, vec1(idx),
3325
                 brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
3326
                            src.hstride - 1));
3327
 
3328
         /* We can only address up to limit bytes using the indirect
3329
          * addressing immediate, account for the difference if the source
3330
          * register is above this limit.
3331
          */
3332
         if (offset >= limit)
3333
            brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
3334
 
3335
         brw_pop_insn_state(p);
3336
 
3337
         /* Use indirect addressing to fetch the specified component. */
3338
         brw_MOV(p, dst,
3339
                 retype(brw_vec1_indirect(addr.subnr, offset % limit),
3340
                        src.type));
3341
      } else {
3342
         /* In SIMD4x2 mode the index can be either zero or one, replicate it
3343
          * to all bits of a flag register,
3344
          */
3345
         inst = brw_MOV(p,
3346
                        brw_null_reg(),
3347
                        stride(brw_swizzle1(idx, 0), 0, 4, 1));
3348
         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
3349
         brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
3350
         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3351
 
3352
         /* and use predicated SEL to pick the right channel. */
3353
         inst = brw_SEL(p, dst,
3354
                        stride(suboffset(src, 4), 0, 4, 1),
3355
                        stride(src, 0, 4, 1));
3356
         brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
3357
         brw_inst_set_flag_reg_nr(devinfo, inst, 1);
3358
      }
3359
   }
3360
}
3361
 
3362
/**
3363
 * This instruction is generated as a single-channel align1 instruction by
3364
 * both the VS and FS stages when using INTEL_DEBUG=shader_time.
3365
 *
3366
 * We can't use the typed atomic op in the FS because that has the execution
3367
 * mask ANDed with the pixel mask, but we just want to write the one dword for
3368
 * all the pixels.
3369
 *
3370
 * We don't use the SIMD4x2 atomic ops in the VS because want to just write
3371
 * one u32.  So we use the same untyped atomic write message as the pixel
3372
 * shader.
3373
 *
3374
 * The untyped atomic operation requires a BUFFER surface type with RAW
3375
 * format, and is only accessible through the legacy DATA_CACHE dataport
3376
 * messages.
3377
 */
3378
void brw_shader_time_add(struct brw_codegen *p,
3379
                         struct brw_reg payload,
3380
                         uint32_t surf_index)
3381
{
3382
   const unsigned sfid = (p->devinfo->gen >= 8 || p->devinfo->is_haswell ?
3383
                          HSW_SFID_DATAPORT_DATA_CACHE_1 :
3384
                          GEN7_SFID_DATAPORT_DATA_CACHE);
3385
   assert(p->devinfo->gen >= 7);
3386
 
3387
   brw_push_insn_state(p);
3388
   brw_set_default_access_mode(p, BRW_ALIGN_1);
3389
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
3390
   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
3391
   brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
3392
 
3393
   /* We use brw_vec1_reg and unmasked because we want to increment the given
3394
    * offset only once.
3395
    */
3396
   brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
3397
                                      BRW_ARF_NULL, 0));
3398
   brw_set_src0(p, send, brw_vec1_reg(payload.file,
3399
                                      payload.nr, 0));
3400
   brw_set_src1(p, send, brw_imm_ud(0));
3401
   brw_set_message_descriptor(p, send, sfid, 2, 0, false, false);
3402
   brw_inst_set_binding_table_index(p->devinfo, send, surf_index);
3403
   brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false);
3404
 
3405
   brw_pop_insn_state(p);
3406
}