Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3254 Serge 1
/*
2
   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
   develop this 3D driver.
5
 
6
   Permission is hereby granted, free of charge, to any person obtaining
7
   a copy of this software and associated documentation files (the
8
   "Software"), to deal in the Software without restriction, including
9
   without limitation the rights to use, copy, modify, merge, publish,
10
   distribute, sublicense, and/or sell copies of the Software, and to
11
   permit persons to whom the Software is furnished to do so, subject to
12
   the following conditions:
13
 
14
   The above copyright notice and this permission notice (including the
15
   next paragraph) shall be included in all copies or substantial
16
   portions of the Software.
17
 
18
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
/*
28
 * Authors:
29
 *   Keith Whitwell 
30
 */
31
 
32
#include "brw_eu.h"
33
 
34
#include 
35
#include 
36
 
37
/***********************************************************************
38
 * Internal helper for constructing instructions
39
 */
40
 
41
static void guess_execution_size(struct brw_compile *p,
42
				 struct brw_instruction *insn,
43
				 struct brw_reg reg)
44
{
45
	if (reg.width == BRW_WIDTH_8 && p->compressed)
46
		insn->header.execution_size = BRW_EXECUTE_16;
47
	else
48
		insn->header.execution_size = reg.width;
49
}
50
 
51
 
52
/**
53
 * Prior to Sandybridge, the SEND instruction accepted non-MRF source
54
 * registers, implicitly moving the operand to a message register.
55
 *
56
 * On Sandybridge, this is no longer the case.  This function performs the
57
 * explicit move; it should be called before emitting a SEND instruction.
58
 */
59
void
60
gen6_resolve_implied_move(struct brw_compile *p,
61
			  struct brw_reg *src,
62
			  unsigned msg_reg_nr)
63
{
64
	if (p->gen < 060)
65
		return;
66
 
67
	if (src->file == BRW_MESSAGE_REGISTER_FILE)
68
		return;
69
 
70
	if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
71
		brw_push_insn_state(p);
72
		brw_set_mask_control(p, BRW_MASK_DISABLE);
73
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
74
		brw_MOV(p, __retype_ud(brw_message_reg(msg_reg_nr)), __retype_ud(*src));
75
		brw_pop_insn_state(p);
76
	}
77
	*src = brw_message_reg(msg_reg_nr);
78
}
79
 
80
static void
81
gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
82
{
83
	/* From the BSpec / ISA Reference / send - [DevIVB+]:
84
	 * "The send with EOT should use register space R112-R127 for . This is
85
	 *  to enable loading of a new thread into the same slot while the message
86
	 *  with EOT for current thread is pending dispatch."
87
	 *
88
	 * Since we're pretending to have 16 MRFs anyway, we may as well use the
89
	 * registers required for messages with EOT.
90
	 */
91
	if (p->gen >= 070 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
92
		reg->file = BRW_GENERAL_REGISTER_FILE;
93
		reg->nr += 111;
94
	}
95
}
96
 
97
void
98
brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
99
	     struct brw_reg dest)
100
{
101
	if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
102
	    dest.file != BRW_MESSAGE_REGISTER_FILE)
103
		assert(dest.nr < 128);
104
 
105
	gen7_convert_mrf_to_grf(p, &dest);
106
 
107
	insn->bits1.da1.dest_reg_file = dest.file;
108
	insn->bits1.da1.dest_reg_type = dest.type;
109
	insn->bits1.da1.dest_address_mode = dest.address_mode;
110
 
111
	if (dest.address_mode == BRW_ADDRESS_DIRECT) {
112
		insn->bits1.da1.dest_reg_nr = dest.nr;
113
 
114
		if (insn->header.access_mode == BRW_ALIGN_1) {
115
			insn->bits1.da1.dest_subreg_nr = dest.subnr;
116
			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
117
				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
118
			insn->bits1.da1.dest_horiz_stride = dest.hstride;
119
		} else {
120
			insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
121
			insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
122
			/* even ignored in da16, still need to set as '01' */
123
			insn->bits1.da16.dest_horiz_stride = 1;
124
		}
125
	} else {
126
		insn->bits1.ia1.dest_subreg_nr = dest.subnr;
127
 
128
		/* These are different sizes in align1 vs align16:
129
		*/
130
		if (insn->header.access_mode == BRW_ALIGN_1) {
131
			insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
132
			if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
133
				dest.hstride = BRW_HORIZONTAL_STRIDE_1;
134
			insn->bits1.ia1.dest_horiz_stride = dest.hstride;
135
		}
136
		else {
137
			insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
138
			/* even ignored in da16, still need to set as '01' */
139
			insn->bits1.ia16.dest_horiz_stride = 1;
140
		}
141
	}
142
 
143
	guess_execution_size(p, insn, dest);
144
}
145
 
146
static const int reg_type_size[8] = {
147
	[0] = 4,
148
	[1] = 4,
149
	[2] = 2,
150
	[3] = 2,
151
	[4] = 1,
152
	[5] = 1,
153
	[7] = 4
154
};
155
 
156
static void
157
validate_reg(struct brw_instruction *insn, struct brw_reg reg)
158
{
159
	int hstride_for_reg[] = {0, 1, 2, 4};
160
	int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
161
	int width_for_reg[] = {1, 2, 4, 8, 16};
162
	int execsize_for_reg[] = {1, 2, 4, 8, 16};
163
	int width, hstride, vstride, execsize;
164
 
165
	if (reg.file == BRW_IMMEDIATE_VALUE) {
166
		/* 3.3.6: Region Parameters.  Restriction: Immediate vectors
167
		 * mean the destination has to be 128-bit aligned and the
168
		 * destination horiz stride has to be a word.
169
		 */
170
		if (reg.type == BRW_REGISTER_TYPE_V) {
171
			assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
172
			       reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
173
		}
174
 
175
		return;
176
	}
177
 
178
	if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
179
	    reg.file == BRW_ARF_NULL)
180
		return;
181
 
182
	hstride = hstride_for_reg[reg.hstride];
183
 
184
	if (reg.vstride == 0xf) {
185
		vstride = -1;
186
	} else {
187
		vstride = vstride_for_reg[reg.vstride];
188
	}
189
 
190
	width = width_for_reg[reg.width];
191
 
192
	execsize = execsize_for_reg[insn->header.execution_size];
193
 
194
	/* Restrictions from 3.3.10: Register Region Restrictions. */
195
	/* 3. */
196
	assert(execsize >= width);
197
 
198
	/* 4. */
199
	if (execsize == width && hstride != 0) {
200
		assert(vstride == -1 || vstride == width * hstride);
201
	}
202
 
203
	/* 5. */
204
	if (execsize == width && hstride == 0) {
205
		/* no restriction on vstride. */
206
	}
207
 
208
	/* 6. */
209
	if (width == 1) {
210
		assert(hstride == 0);
211
	}
212
 
213
	/* 7. */
214
	if (execsize == 1 && width == 1) {
215
		assert(hstride == 0);
216
		assert(vstride == 0);
217
	}
218
 
219
	/* 8. */
220
	if (vstride == 0 && hstride == 0) {
221
		assert(width == 1);
222
	}
223
 
224
	/* 10. Check destination issues. */
225
}
226
 
227
void
228
brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
229
	     struct brw_reg reg)
230
{
231
	if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
232
		assert(reg.nr < 128);
233
 
234
	gen7_convert_mrf_to_grf(p, ®);
235
 
236
	validate_reg(insn, reg);
237
 
238
	insn->bits1.da1.src0_reg_file = reg.file;
239
	insn->bits1.da1.src0_reg_type = reg.type;
240
	insn->bits2.da1.src0_abs = reg.abs;
241
	insn->bits2.da1.src0_negate = reg.negate;
242
	insn->bits2.da1.src0_address_mode = reg.address_mode;
243
 
244
	if (reg.file == BRW_IMMEDIATE_VALUE) {
245
		insn->bits3.ud = reg.dw1.ud;
246
 
247
		/* Required to set some fields in src1 as well:
248
		*/
249
		insn->bits1.da1.src1_reg_file = 0; /* arf */
250
		insn->bits1.da1.src1_reg_type = reg.type;
251
	} else {
252
		if (reg.address_mode == BRW_ADDRESS_DIRECT) {
253
			if (insn->header.access_mode == BRW_ALIGN_1) {
254
				insn->bits2.da1.src0_subreg_nr = reg.subnr;
255
				insn->bits2.da1.src0_reg_nr = reg.nr;
256
			} else {
257
				insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
258
				insn->bits2.da16.src0_reg_nr = reg.nr;
259
			}
260
		} else {
261
			insn->bits2.ia1.src0_subreg_nr = reg.subnr;
262
 
263
			if (insn->header.access_mode == BRW_ALIGN_1) {
264
				insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
265
			} else {
266
				insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
267
			}
268
		}
269
 
270
		if (insn->header.access_mode == BRW_ALIGN_1) {
271
			if (reg.width == BRW_WIDTH_1 &&
272
			    insn->header.execution_size == BRW_EXECUTE_1) {
273
				insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
274
				insn->bits2.da1.src0_width = BRW_WIDTH_1;
275
				insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
276
			} else {
277
				insn->bits2.da1.src0_horiz_stride = reg.hstride;
278
				insn->bits2.da1.src0_width = reg.width;
279
				insn->bits2.da1.src0_vert_stride = reg.vstride;
280
			}
281
		} else {
282
			insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
283
			insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
284
			insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
285
			insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
286
 
287
			/* This is an oddity of the fact we're using the same
288
			 * descriptions for registers in align_16 as align_1:
289
			 */
290
			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
291
				insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
292
			else
293
				insn->bits2.da16.src0_vert_stride = reg.vstride;
294
		}
295
	}
296
}
297
 
298
void brw_set_src1(struct brw_compile *p,
299
		  struct brw_instruction *insn,
300
		  struct brw_reg reg)
301
{
302
	assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
303
	assert(reg.nr < 128);
304
 
305
	gen7_convert_mrf_to_grf(p, ®);
306
 
307
	validate_reg(insn, reg);
308
 
309
	insn->bits1.da1.src1_reg_file = reg.file;
310
	insn->bits1.da1.src1_reg_type = reg.type;
311
	insn->bits3.da1.src1_abs = reg.abs;
312
	insn->bits3.da1.src1_negate = reg.negate;
313
 
314
	/* Only src1 can be immediate in two-argument instructions. */
315
	assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
316
 
317
	if (reg.file == BRW_IMMEDIATE_VALUE) {
318
		insn->bits3.ud = reg.dw1.ud;
319
	} else {
320
		/* This is a hardware restriction, which may or may not be lifted
321
		 * in the future:
322
		 */
323
		assert (reg.address_mode == BRW_ADDRESS_DIRECT);
324
		/* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
325
 
326
		if (insn->header.access_mode == BRW_ALIGN_1) {
327
			insn->bits3.da1.src1_subreg_nr = reg.subnr;
328
			insn->bits3.da1.src1_reg_nr = reg.nr;
329
		} else {
330
			insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
331
			insn->bits3.da16.src1_reg_nr = reg.nr;
332
		}
333
 
334
		if (insn->header.access_mode == BRW_ALIGN_1) {
335
			if (reg.width == BRW_WIDTH_1 &&
336
			    insn->header.execution_size == BRW_EXECUTE_1) {
337
				insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
338
				insn->bits3.da1.src1_width = BRW_WIDTH_1;
339
				insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
340
			} else {
341
				insn->bits3.da1.src1_horiz_stride = reg.hstride;
342
				insn->bits3.da1.src1_width = reg.width;
343
				insn->bits3.da1.src1_vert_stride = reg.vstride;
344
			}
345
		} else {
346
			insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
347
			insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
348
			insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
349
			insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
350
 
351
			/* This is an oddity of the fact we're using the same
352
			 * descriptions for registers in align_16 as align_1:
353
			 */
354
			if (reg.vstride == BRW_VERTICAL_STRIDE_8)
355
				insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
356
			else
357
				insn->bits3.da16.src1_vert_stride = reg.vstride;
358
		}
359
	}
360
}
361
 
362
/**
363
 * Set the Message Descriptor and Extended Message Descriptor fields
364
 * for SEND messages.
365
 *
366
 * \note This zeroes out the Function Control bits, so it must be called
367
 *       \b before filling out any message-specific data.  Callers can
368
 *       choose not to fill in irrelevant bits; they will be zero.
369
 */
370
static void
371
brw_set_message_descriptor(struct brw_compile *p,
372
			   struct brw_instruction *inst,
373
			   enum brw_message_target sfid,
374
			   unsigned msg_length,
375
			   unsigned response_length,
376
			   bool header_present,
377
			   bool end_of_thread)
378
{
379
	brw_set_src1(p, inst, brw_imm_d(0));
380
 
381
	if (p->gen >= 050) {
382
		inst->bits3.generic_gen5.header_present = header_present;
383
		inst->bits3.generic_gen5.response_length = response_length;
384
		inst->bits3.generic_gen5.msg_length = msg_length;
385
		inst->bits3.generic_gen5.end_of_thread = end_of_thread;
386
 
387
		if (p->gen >= 060) {
388
			/* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
389
			inst->header.destreg__conditionalmod = sfid;
390
		} else {
391
			/* Set Extended Message Descriptor (ex_desc) */
392
			inst->bits2.send_gen5.sfid = sfid;
393
			inst->bits2.send_gen5.end_of_thread = end_of_thread;
394
		}
395
	} else {
396
		inst->bits3.generic.response_length = response_length;
397
		inst->bits3.generic.msg_length = msg_length;
398
		inst->bits3.generic.msg_target = sfid;
399
		inst->bits3.generic.end_of_thread = end_of_thread;
400
	}
401
}
402
 
403
 
404
static void brw_set_math_message(struct brw_compile *p,
405
				 struct brw_instruction *insn,
406
				 unsigned function,
407
				 unsigned integer_type,
408
				 bool low_precision,
409
				 bool saturate,
410
				 unsigned dataType)
411
{
412
	unsigned msg_length;
413
	unsigned response_length;
414
 
415
	/* Infer message length from the function */
416
	switch (function) {
417
	case BRW_MATH_FUNCTION_POW:
418
	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
419
	case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
420
	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
421
		msg_length = 2;
422
		break;
423
	default:
424
		msg_length = 1;
425
		break;
426
	}
427
 
428
	/* Infer response length from the function */
429
	switch (function) {
430
	case BRW_MATH_FUNCTION_SINCOS:
431
	case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
432
		response_length = 2;
433
		break;
434
	default:
435
		response_length = 1;
436
		break;
437
	}
438
 
439
	brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
440
				   msg_length, response_length,
441
				   false, false);
442
	if (p->gen == 050) {
443
		insn->bits3.math_gen5.function = function;
444
		insn->bits3.math_gen5.int_type = integer_type;
445
		insn->bits3.math_gen5.precision = low_precision;
446
		insn->bits3.math_gen5.saturate = saturate;
447
		insn->bits3.math_gen5.data_type = dataType;
448
		insn->bits3.math_gen5.snapshot = 0;
449
	} else {
450
		insn->bits3.math.function = function;
451
		insn->bits3.math.int_type = integer_type;
452
		insn->bits3.math.precision = low_precision;
453
		insn->bits3.math.saturate = saturate;
454
		insn->bits3.math.data_type = dataType;
455
	}
456
}
457
 
458
static void brw_set_ff_sync_message(struct brw_compile *p,
459
				    struct brw_instruction *insn,
460
				    bool allocate,
461
				    unsigned response_length,
462
				    bool end_of_thread)
463
{
464
	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
465
				   1, response_length,
466
				   true, end_of_thread);
467
	insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
468
	insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
469
	insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
470
	insn->bits3.urb_gen5.allocate = allocate;
471
	insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
472
	insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
473
}
474
 
475
static void brw_set_urb_message(struct brw_compile *p,
476
				struct brw_instruction *insn,
477
				bool allocate,
478
				bool used,
479
				unsigned msg_length,
480
				unsigned response_length,
481
				bool end_of_thread,
482
				bool complete,
483
				unsigned offset,
484
				unsigned swizzle_control)
485
{
486
	brw_set_message_descriptor(p, insn, BRW_SFID_URB,
487
				   msg_length, response_length, true, end_of_thread);
488
	if (p->gen >= 070) {
489
		insn->bits3.urb_gen7.opcode = 0;	/* URB_WRITE_HWORD */
490
		insn->bits3.urb_gen7.offset = offset;
491
		assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
492
		insn->bits3.urb_gen7.swizzle_control = swizzle_control;
493
		/* per_slot_offset = 0 makes it ignore offsets in message header */
494
		insn->bits3.urb_gen7.per_slot_offset = 0;
495
		insn->bits3.urb_gen7.complete = complete;
496
	} else if (p->gen >= 050) {
497
		insn->bits3.urb_gen5.opcode = 0;	/* URB_WRITE */
498
		insn->bits3.urb_gen5.offset = offset;
499
		insn->bits3.urb_gen5.swizzle_control = swizzle_control;
500
		insn->bits3.urb_gen5.allocate = allocate;
501
		insn->bits3.urb_gen5.used = used;	/* ? */
502
		insn->bits3.urb_gen5.complete = complete;
503
	} else {
504
		insn->bits3.urb.opcode = 0;	/* ? */
505
		insn->bits3.urb.offset = offset;
506
		insn->bits3.urb.swizzle_control = swizzle_control;
507
		insn->bits3.urb.allocate = allocate;
508
		insn->bits3.urb.used = used;	/* ? */
509
		insn->bits3.urb.complete = complete;
510
	}
511
}
512
 
513
void
514
brw_set_dp_write_message(struct brw_compile *p,
515
			 struct brw_instruction *insn,
516
			 unsigned binding_table_index,
517
			 unsigned msg_control,
518
			 unsigned msg_type,
519
			 unsigned msg_length,
520
			 bool header_present,
521
			 bool last_render_target,
522
			 unsigned response_length,
523
			 bool end_of_thread,
524
			 bool send_commit_msg)
525
{
526
	unsigned sfid;
527
 
528
	if (p->gen >= 070) {
529
		/* Use the Render Cache for RT writes; otherwise use the Data Cache */
530
		if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
531
			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
532
		else
533
			sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
534
	} else if (p->gen >= 060) {
535
		/* Use the render cache for all write messages. */
536
		sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
537
	} else {
538
		sfid = BRW_SFID_DATAPORT_WRITE;
539
	}
540
 
541
	brw_set_message_descriptor(p, insn, sfid,
542
				   msg_length, response_length,
543
				   header_present, end_of_thread);
544
 
545
	if (p->gen >= 070) {
546
		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
547
		insn->bits3.gen7_dp.msg_control = msg_control;
548
		insn->bits3.gen7_dp.last_render_target = last_render_target;
549
		insn->bits3.gen7_dp.msg_type = msg_type;
550
	} else if (p->gen >= 060) {
551
		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
552
		insn->bits3.gen6_dp.msg_control = msg_control;
553
		insn->bits3.gen6_dp.last_render_target = last_render_target;
554
		insn->bits3.gen6_dp.msg_type = msg_type;
555
		insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
556
	} else if (p->gen >= 050) {
557
		insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
558
		insn->bits3.dp_write_gen5.msg_control = msg_control;
559
		insn->bits3.dp_write_gen5.last_render_target = last_render_target;
560
		insn->bits3.dp_write_gen5.msg_type = msg_type;
561
		insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
562
	} else {
563
		insn->bits3.dp_write.binding_table_index = binding_table_index;
564
		insn->bits3.dp_write.msg_control = msg_control;
565
		insn->bits3.dp_write.last_render_target = last_render_target;
566
		insn->bits3.dp_write.msg_type = msg_type;
567
		insn->bits3.dp_write.send_commit_msg = send_commit_msg;
568
	}
569
}
570
 
571
void
572
brw_set_dp_read_message(struct brw_compile *p,
573
			struct brw_instruction *insn,
574
			unsigned binding_table_index,
575
			unsigned msg_control,
576
			unsigned msg_type,
577
			unsigned target_cache,
578
			unsigned msg_length,
579
			unsigned response_length)
580
{
581
	unsigned sfid;
582
 
583
	if (p->gen >= 070) {
584
		sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
585
	} else if (p->gen >= 060) {
586
		if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
587
			sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
588
		else
589
			sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
590
	} else {
591
		sfid = BRW_SFID_DATAPORT_READ;
592
	}
593
 
594
	brw_set_message_descriptor(p, insn, sfid,
595
				   msg_length, response_length,
596
				   true, false);
597
 
598
	if (p->gen >= 070) {
599
		insn->bits3.gen7_dp.binding_table_index = binding_table_index;
600
		insn->bits3.gen7_dp.msg_control = msg_control;
601
		insn->bits3.gen7_dp.last_render_target = 0;
602
		insn->bits3.gen7_dp.msg_type = msg_type;
603
	} else if (p->gen >= 060) {
604
		insn->bits3.gen6_dp.binding_table_index = binding_table_index;
605
		insn->bits3.gen6_dp.msg_control = msg_control;
606
		insn->bits3.gen6_dp.last_render_target = 0;
607
		insn->bits3.gen6_dp.msg_type = msg_type;
608
		insn->bits3.gen6_dp.send_commit_msg = 0;
609
	} else if (p->gen >= 050) {
610
		insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
611
		insn->bits3.dp_read_gen5.msg_control = msg_control;
612
		insn->bits3.dp_read_gen5.msg_type = msg_type;
613
		insn->bits3.dp_read_gen5.target_cache = target_cache;
614
	} else if (p->gen >= 045) {
615
		insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
616
		insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
617
		insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
618
		insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
619
	} else {
620
		insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
621
		insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
622
		insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
623
		insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
624
	}
625
}
626
 
627
static void brw_set_sampler_message(struct brw_compile *p,
628
                                    struct brw_instruction *insn,
629
                                    unsigned binding_table_index,
630
                                    unsigned sampler,
631
                                    unsigned msg_type,
632
                                    unsigned response_length,
633
                                    unsigned msg_length,
634
                                    bool header_present,
635
                                    unsigned simd_mode)
636
{
637
	brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER,
638
				   msg_length, response_length,
639
				   header_present, false);
640
 
641
	if (p->gen >= 070) {
642
		insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
643
		insn->bits3.sampler_gen7.sampler = sampler;
644
		insn->bits3.sampler_gen7.msg_type = msg_type;
645
		insn->bits3.sampler_gen7.simd_mode = simd_mode;
646
	} else if (p->gen >= 050) {
647
		insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
648
		insn->bits3.sampler_gen5.sampler = sampler;
649
		insn->bits3.sampler_gen5.msg_type = msg_type;
650
		insn->bits3.sampler_gen5.simd_mode = simd_mode;
651
	} else if (p->gen >= 045) {
652
		insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
653
		insn->bits3.sampler_g4x.sampler = sampler;
654
		insn->bits3.sampler_g4x.msg_type = msg_type;
655
	} else {
656
		insn->bits3.sampler.binding_table_index = binding_table_index;
657
		insn->bits3.sampler.sampler = sampler;
658
		insn->bits3.sampler.msg_type = msg_type;
659
		insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
660
	}
661
}
662
 
663
 
664
void brw_NOP(struct brw_compile *p)
665
{
666
	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_NOP);
667
	brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
668
	brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
669
	brw_set_src1(p, insn, brw_imm_ud(0x0));
670
}
671
 
672
/***********************************************************************
673
 * Comparisons, if/else/endif
674
 */
675
 
676
static void
677
push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
678
{
679
	p->if_stack[p->if_stack_depth] = inst;
680
 
681
	p->if_stack_depth++;
682
	if (p->if_stack_array_size <= p->if_stack_depth) {
683
		p->if_stack_array_size *= 2;
684
		p->if_stack = realloc(p->if_stack, sizeof(struct brw_instruction *)*p->if_stack_array_size);
685
	}
686
}
687
 
688
/* EU takes the value from the flag register and pushes it onto some
689
 * sort of a stack (presumably merging with any flag value already on
690
 * the stack).  Within an if block, the flags at the top of the stack
691
 * control execution on each channel of the unit, eg. on each of the
692
 * 16 pixel values in our wm programs.
693
 *
694
 * When the matching 'else' instruction is reached (presumably by
695
 * countdown of the instruction count patched in by our ELSE/ENDIF
696
 * functions), the relevent flags are inverted.
697
 *
698
 * When the matching 'endif' instruction is reached, the flags are
699
 * popped off.  If the stack is now empty, normal execution resumes.
700
 */
701
struct brw_instruction *
702
brw_IF(struct brw_compile *p, unsigned execute_size)
703
{
704
	struct brw_instruction *insn;
705
 
706
	insn = brw_next_insn(p, BRW_OPCODE_IF);
707
 
708
	/* Override the defaults for this instruction: */
709
	if (p->gen < 060) {
710
		brw_set_dest(p, insn, brw_ip_reg());
711
		brw_set_src0(p, insn, brw_ip_reg());
712
		brw_set_src1(p, insn, brw_imm_d(0x0));
713
	} else if (p->gen < 070) {
714
		brw_set_dest(p, insn, brw_imm_w(0));
715
		insn->bits1.branch_gen6.jump_count = 0;
716
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
717
		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
718
	} else {
719
		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
720
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
721
		brw_set_src1(p, insn, brw_imm_ud(0));
722
		insn->bits3.break_cont.jip = 0;
723
		insn->bits3.break_cont.uip = 0;
724
	}
725
 
726
	insn->header.execution_size = execute_size;
727
	insn->header.compression_control = BRW_COMPRESSION_NONE;
728
	insn->header.predicate_control = BRW_PREDICATE_NORMAL;
729
	insn->header.mask_control = BRW_MASK_ENABLE;
730
	if (!p->single_program_flow)
731
		insn->header.thread_control = BRW_THREAD_SWITCH;
732
 
733
	p->current->header.predicate_control = BRW_PREDICATE_NONE;
734
 
735
	push_if_stack(p, insn);
736
	return insn;
737
}
738
 
739
/* This function is only used for gen6-style IF instructions with an
740
 * embedded comparison (conditional modifier).  It is not used on gen7.
741
 */
742
struct brw_instruction *
743
gen6_IF(struct brw_compile *p, uint32_t conditional,
744
	struct brw_reg src0, struct brw_reg src1)
745
{
746
	struct brw_instruction *insn;
747
 
748
	insn = brw_next_insn(p, BRW_OPCODE_IF);
749
 
750
	brw_set_dest(p, insn, brw_imm_w(0));
751
	if (p->compressed) {
752
		insn->header.execution_size = BRW_EXECUTE_16;
753
	} else {
754
		insn->header.execution_size = BRW_EXECUTE_8;
755
	}
756
	insn->bits1.branch_gen6.jump_count = 0;
757
	brw_set_src0(p, insn, src0);
758
	brw_set_src1(p, insn, src1);
759
 
760
	assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
761
	assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
762
	insn->header.destreg__conditionalmod = conditional;
763
 
764
	if (!p->single_program_flow)
765
		insn->header.thread_control = BRW_THREAD_SWITCH;
766
 
767
	push_if_stack(p, insn);
768
	return insn;
769
}
770
 
771
/**
772
 * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
773
 */
774
static void
775
convert_IF_ELSE_to_ADD(struct brw_compile *p,
776
		       struct brw_instruction *if_inst,
777
		       struct brw_instruction *else_inst)
778
{
779
	/* The next instruction (where the ENDIF would be, if it existed) */
780
	struct brw_instruction *next_inst = &p->store[p->nr_insn];
781
 
782
	assert(p->single_program_flow);
783
	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
784
	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
785
	assert(if_inst->header.execution_size == BRW_EXECUTE_1);
786
 
787
	/* Convert IF to an ADD instruction that moves the instruction pointer
788
	 * to the first instruction of the ELSE block.  If there is no ELSE
789
	 * block, point to where ENDIF would be.  Reverse the predicate.
790
	 *
791
	 * There's no need to execute an ENDIF since we don't need to do any
792
	 * stack operations, and if we're currently executing, we just want to
793
	 * continue normally.
794
	 */
795
	if_inst->header.opcode = BRW_OPCODE_ADD;
796
	if_inst->header.predicate_inverse = 1;
797
 
798
	if (else_inst != NULL) {
799
		/* Convert ELSE to an ADD instruction that points where the ENDIF
800
		 * would be.
801
		 */
802
		else_inst->header.opcode = BRW_OPCODE_ADD;
803
 
804
		if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
805
		else_inst->bits3.ud = (next_inst - else_inst) * 16;
806
	} else {
807
		if_inst->bits3.ud = (next_inst - if_inst) * 16;
808
	}
809
}
810
 
811
/**
812
 * Patch IF and ELSE instructions with appropriate jump targets.
813
 */
814
static void
815
patch_IF_ELSE(struct brw_compile *p,
816
	      struct brw_instruction *if_inst,
817
	      struct brw_instruction *else_inst,
818
	      struct brw_instruction *endif_inst)
819
{
820
	unsigned br = 1;
821
 
822
	assert(!p->single_program_flow);
823
	assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
824
	assert(endif_inst != NULL);
825
	assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
826
 
827
	/* Jump count is for 64bit data chunk each, so one 128bit instruction
828
	 * requires 2 chunks.
829
	 */
830
	if (p->gen >= 050)
831
		br = 2;
832
 
833
	assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
834
	endif_inst->header.execution_size = if_inst->header.execution_size;
835
 
836
	if (else_inst == NULL) {
837
		/* Patch IF -> ENDIF */
838
		if (p->gen < 060) {
839
			/* Turn it into an IFF, which means no mask stack operations for
840
			 * all-false and jumping past the ENDIF.
841
			 */
842
			if_inst->header.opcode = BRW_OPCODE_IFF;
843
			if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
844
			if_inst->bits3.if_else.pop_count = 0;
845
			if_inst->bits3.if_else.pad0 = 0;
846
		} else if (p->gen < 070) {
847
			/* As of gen6, there is no IFF and IF must point to the ENDIF. */
848
			if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
849
		} else {
850
			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
851
			if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
852
		}
853
	} else {
854
		else_inst->header.execution_size = if_inst->header.execution_size;
855
 
856
		/* Patch IF -> ELSE */
857
		if (p->gen < 060) {
858
			if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
859
			if_inst->bits3.if_else.pop_count = 0;
860
			if_inst->bits3.if_else.pad0 = 0;
861
		} else if (p->gen <= 070) {
862
			if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
863
		}
864
 
865
		/* Patch ELSE -> ENDIF */
866
		if (p->gen < 060) {
867
			/* BRW_OPCODE_ELSE pre-gen6 should point just past the
868
			 * matching ENDIF.
869
			 */
870
			else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
871
			else_inst->bits3.if_else.pop_count = 1;
872
			else_inst->bits3.if_else.pad0 = 0;
873
		} else if (p->gen < 070) {
874
			/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
875
			else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
876
		} else {
877
			/* The IF instruction's JIP should point just past the ELSE */
878
			if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
879
			/* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
880
			if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
881
			else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
882
		}
883
	}
884
}
885
 
886
void
887
brw_ELSE(struct brw_compile *p)
888
{
889
	struct brw_instruction *insn;
890
 
891
	insn = brw_next_insn(p, BRW_OPCODE_ELSE);
892
 
893
	if (p->gen < 060) {
894
		brw_set_dest(p, insn, brw_ip_reg());
895
		brw_set_src0(p, insn, brw_ip_reg());
896
		brw_set_src1(p, insn, brw_imm_d(0x0));
897
	} else if (p->gen < 070) {
898
		brw_set_dest(p, insn, brw_imm_w(0));
899
		insn->bits1.branch_gen6.jump_count = 0;
900
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
901
		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
902
	} else {
903
		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
904
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
905
		brw_set_src1(p, insn, brw_imm_ud(0));
906
		insn->bits3.break_cont.jip = 0;
907
		insn->bits3.break_cont.uip = 0;
908
	}
909
 
910
	insn->header.compression_control = BRW_COMPRESSION_NONE;
911
	insn->header.mask_control = BRW_MASK_ENABLE;
912
	if (!p->single_program_flow)
913
		insn->header.thread_control = BRW_THREAD_SWITCH;
914
 
915
	push_if_stack(p, insn);
916
}
917
 
918
void
919
brw_ENDIF(struct brw_compile *p)
920
{
921
	struct brw_instruction *insn;
922
	struct brw_instruction *else_inst = NULL;
923
	struct brw_instruction *if_inst = NULL;
924
 
925
	/* Pop the IF and (optional) ELSE instructions from the stack */
926
	p->if_stack_depth--;
927
	if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
928
		else_inst = p->if_stack[p->if_stack_depth];
929
		p->if_stack_depth--;
930
	}
931
	if_inst = p->if_stack[p->if_stack_depth];
932
 
933
	if (p->single_program_flow) {
934
		/* ENDIF is useless; don't bother emitting it. */
935
		convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
936
		return;
937
	}
938
 
939
	insn = brw_next_insn(p, BRW_OPCODE_ENDIF);
940
 
941
	if (p->gen < 060) {
942
		brw_set_dest(p, insn, __retype_ud(brw_vec4_grf(0,0)));
943
		brw_set_src0(p, insn, __retype_ud(brw_vec4_grf(0,0)));
944
		brw_set_src1(p, insn, brw_imm_d(0x0));
945
	} else if (p->gen < 070) {
946
		brw_set_dest(p, insn, brw_imm_w(0));
947
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
948
		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
949
	} else {
950
		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
951
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
952
		brw_set_src1(p, insn, brw_imm_ud(0));
953
	}
954
 
955
	insn->header.compression_control = BRW_COMPRESSION_NONE;
956
	insn->header.mask_control = BRW_MASK_ENABLE;
957
	insn->header.thread_control = BRW_THREAD_SWITCH;
958
 
959
	/* Also pop item off the stack in the endif instruction: */
960
	if (p->gen < 060) {
961
		insn->bits3.if_else.jump_count = 0;
962
		insn->bits3.if_else.pop_count = 1;
963
		insn->bits3.if_else.pad0 = 0;
964
	} else if (p->gen < 070) {
965
		insn->bits1.branch_gen6.jump_count = 2;
966
	} else {
967
		insn->bits3.break_cont.jip = 2;
968
	}
969
	patch_IF_ELSE(p, if_inst, else_inst, insn);
970
}
971
 
972
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
973
{
974
	struct brw_instruction *insn;
975
 
976
	insn = brw_next_insn(p, BRW_OPCODE_BREAK);
977
	if (p->gen >= 060) {
978
		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
979
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
980
		brw_set_src1(p, insn, brw_imm_d(0x0));
981
	} else {
982
		brw_set_dest(p, insn, brw_ip_reg());
983
		brw_set_src0(p, insn, brw_ip_reg());
984
		brw_set_src1(p, insn, brw_imm_d(0x0));
985
		insn->bits3.if_else.pad0 = 0;
986
		insn->bits3.if_else.pop_count = pop_count;
987
	}
988
	insn->header.compression_control = BRW_COMPRESSION_NONE;
989
	insn->header.execution_size = BRW_EXECUTE_8;
990
 
991
	return insn;
992
}
993
 
994
struct brw_instruction *gen6_CONT(struct brw_compile *p,
995
				  struct brw_instruction *do_insn)
996
{
997
	struct brw_instruction *insn;
998
 
999
	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1000
	brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1001
	brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1002
	brw_set_dest(p, insn, brw_ip_reg());
1003
	brw_set_src0(p, insn, brw_ip_reg());
1004
	brw_set_src1(p, insn, brw_imm_d(0x0));
1005
 
1006
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1007
	insn->header.execution_size = BRW_EXECUTE_8;
1008
	return insn;
1009
}
1010
 
1011
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
1012
{
1013
	struct brw_instruction *insn;
1014
	insn = brw_next_insn(p, BRW_OPCODE_CONTINUE);
1015
	brw_set_dest(p, insn, brw_ip_reg());
1016
	brw_set_src0(p, insn, brw_ip_reg());
1017
	brw_set_src1(p, insn, brw_imm_d(0x0));
1018
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1019
	insn->header.execution_size = BRW_EXECUTE_8;
1020
	/* insn->header.mask_control = BRW_MASK_DISABLE; */
1021
	insn->bits3.if_else.pad0 = 0;
1022
	insn->bits3.if_else.pop_count = pop_count;
1023
	return insn;
1024
}
1025
 
1026
/* DO/WHILE loop:
1027
 *
1028
 * The DO/WHILE is just an unterminated loop -- break or continue are
1029
 * used for control within the loop.  We have a few ways they can be
1030
 * done.
1031
 *
1032
 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
1033
 * jip and no DO instruction.
1034
 *
1035
 * For non-uniform control flow pre-gen6, there's a DO instruction to
1036
 * push the mask, and a WHILE to jump back, and BREAK to get out and
1037
 * pop the mask.
1038
 *
1039
 * For gen6, there's no more mask stack, so no need for DO.  WHILE
1040
 * just points back to the first instruction of the loop.
1041
 */
1042
struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
1043
{
1044
	if (p->gen >= 060 || p->single_program_flow) {
1045
		return &p->store[p->nr_insn];
1046
	} else {
1047
		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_DO);
1048
 
1049
		/* Override the defaults for this instruction:
1050
		*/
1051
		brw_set_dest(p, insn, brw_null_reg());
1052
		brw_set_src0(p, insn, brw_null_reg());
1053
		brw_set_src1(p, insn, brw_null_reg());
1054
 
1055
		insn->header.compression_control = BRW_COMPRESSION_NONE;
1056
		insn->header.execution_size = execute_size;
1057
		insn->header.predicate_control = BRW_PREDICATE_NONE;
1058
		/* insn->header.mask_control = BRW_MASK_ENABLE; */
1059
		/* insn->header.mask_control = BRW_MASK_DISABLE; */
1060
 
1061
		return insn;
1062
	}
1063
}
1064
 
1065
struct brw_instruction *brw_WHILE(struct brw_compile *p,
1066
                                  struct brw_instruction *do_insn)
1067
{
1068
	struct brw_instruction *insn;
1069
	unsigned br = 1;
1070
 
1071
	if (p->gen >= 050)
1072
		br = 2;
1073
 
1074
	if (p->gen >= 070) {
1075
		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1076
 
1077
		brw_set_dest(p, insn, __retype_d(brw_null_reg()));
1078
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1079
		brw_set_src1(p, insn, brw_imm_ud(0));
1080
		insn->bits3.break_cont.jip = br * (do_insn - insn);
1081
 
1082
		insn->header.execution_size = BRW_EXECUTE_8;
1083
	} else if (p->gen >= 060) {
1084
		insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1085
 
1086
		brw_set_dest(p, insn, brw_imm_w(0));
1087
		insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
1088
		brw_set_src0(p, insn, __retype_d(brw_null_reg()));
1089
		brw_set_src1(p, insn, __retype_d(brw_null_reg()));
1090
 
1091
		insn->header.execution_size = BRW_EXECUTE_8;
1092
	} else {
1093
		if (p->single_program_flow) {
1094
			insn = brw_next_insn(p, BRW_OPCODE_ADD);
1095
 
1096
			brw_set_dest(p, insn, brw_ip_reg());
1097
			brw_set_src0(p, insn, brw_ip_reg());
1098
			brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
1099
			insn->header.execution_size = BRW_EXECUTE_1;
1100
		} else {
1101
			insn = brw_next_insn(p, BRW_OPCODE_WHILE);
1102
 
1103
			assert(do_insn->header.opcode == BRW_OPCODE_DO);
1104
 
1105
			brw_set_dest(p, insn, brw_ip_reg());
1106
			brw_set_src0(p, insn, brw_ip_reg());
1107
			brw_set_src1(p, insn, brw_imm_d(0));
1108
 
1109
			insn->header.execution_size = do_insn->header.execution_size;
1110
			insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
1111
			insn->bits3.if_else.pop_count = 0;
1112
			insn->bits3.if_else.pad0 = 0;
1113
		}
1114
	}
1115
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1116
	p->current->header.predicate_control = BRW_PREDICATE_NONE;
1117
 
1118
	return insn;
1119
}
1120
 
1121
/* FORWARD JUMPS:
1122
 */
1123
void brw_land_fwd_jump(struct brw_compile *p,
1124
		       struct brw_instruction *jmp_insn)
1125
{
1126
	struct brw_instruction *landing = &p->store[p->nr_insn];
1127
	unsigned jmpi = 1;
1128
 
1129
	if (p->gen >= 050)
1130
		jmpi = 2;
1131
 
1132
	assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
1133
	assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
1134
 
1135
	jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
1136
}
1137
 
1138
 
1139
 
1140
/* To integrate with the above, it makes sense that the comparison
1141
 * instruction should populate the flag register.  It might be simpler
1142
 * just to use the flag reg for most WM tasks?
1143
 */
1144
void brw_CMP(struct brw_compile *p,
1145
	     struct brw_reg dest,
1146
	     unsigned conditional,
1147
	     struct brw_reg src0,
1148
	     struct brw_reg src1)
1149
{
1150
	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_CMP);
1151
 
1152
	insn->header.destreg__conditionalmod = conditional;
1153
	brw_set_dest(p, insn, dest);
1154
	brw_set_src0(p, insn, src0);
1155
	brw_set_src1(p, insn, src1);
1156
 
1157
	/* Make it so that future instructions will use the computed flag
1158
	 * value until brw_set_predicate_control_flag_value() is called
1159
	 * again.
1160
	 */
1161
	if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
1162
	    dest.nr == 0) {
1163
		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1164
		p->flag_value = 0xff;
1165
	}
1166
}
1167
 
1168
/* Issue 'wait' instruction for n1, host could program MMIO
1169
   to wake up thread. */
1170
void brw_WAIT(struct brw_compile *p)
1171
{
1172
	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_WAIT);
1173
	struct brw_reg src = brw_notification_1_reg();
1174
 
1175
	brw_set_dest(p, insn, src);
1176
	brw_set_src0(p, insn, src);
1177
	brw_set_src1(p, insn, brw_null_reg());
1178
	insn->header.execution_size = 0; /* must */
1179
	insn->header.predicate_control = 0;
1180
	insn->header.compression_control = 0;
1181
}
1182
 
1183
/***********************************************************************
1184
 * Helpers for the various SEND message types:
1185
 */
1186
 
1187
/** Extended math function, float[8].
1188
 */
1189
void brw_math(struct brw_compile *p,
1190
	      struct brw_reg dest,
1191
	      unsigned function,
1192
	      unsigned saturate,
1193
	      unsigned msg_reg_nr,
1194
	      struct brw_reg src,
1195
	      unsigned data_type,
1196
	      unsigned precision)
1197
{
1198
	if (p->gen >= 060) {
1199
		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1200
 
1201
		assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1202
		assert(src.file == BRW_GENERAL_REGISTER_FILE);
1203
 
1204
		assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1205
		assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
1206
 
1207
		/* Source modifiers are ignored for extended math instructions. */
1208
		assert(!src.negate);
1209
		assert(!src.abs);
1210
 
1211
		if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1212
		    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1213
			assert(src.type == BRW_REGISTER_TYPE_F);
1214
		}
1215
 
1216
		/* Math is the same ISA format as other opcodes, except that CondModifier
1217
		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1218
		 */
1219
		insn->header.destreg__conditionalmod = function;
1220
		insn->header.saturate = saturate;
1221
 
1222
		brw_set_dest(p, insn, dest);
1223
		brw_set_src0(p, insn, src);
1224
		brw_set_src1(p, insn, brw_null_reg());
1225
	} else {
1226
		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1227
		/* Example code doesn't set predicate_control for send
1228
		 * instructions.
1229
		 */
1230
		insn->header.predicate_control = 0;
1231
		insn->header.destreg__conditionalmod = msg_reg_nr;
1232
 
1233
		brw_set_dest(p, insn, dest);
1234
		brw_set_src0(p, insn, src);
1235
		brw_set_math_message(p, insn, function,
1236
				     src.type == BRW_REGISTER_TYPE_D,
1237
				     precision,
1238
				     saturate,
1239
				     data_type);
1240
	}
1241
}
1242
 
1243
/** Extended math function, float[8].
1244
 */
1245
void brw_math2(struct brw_compile *p,
1246
	       struct brw_reg dest,
1247
	       unsigned function,
1248
	       struct brw_reg src0,
1249
	       struct brw_reg src1)
1250
{
1251
	struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_MATH);
1252
 
1253
	assert(dest.file == BRW_GENERAL_REGISTER_FILE);
1254
	assert(src0.file == BRW_GENERAL_REGISTER_FILE);
1255
	assert(src1.file == BRW_GENERAL_REGISTER_FILE);
1256
 
1257
	assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
1258
	assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
1259
	assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
1260
 
1261
	if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
1262
	    function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
1263
		assert(src0.type == BRW_REGISTER_TYPE_F);
1264
		assert(src1.type == BRW_REGISTER_TYPE_F);
1265
	}
1266
 
1267
	/* Source modifiers are ignored for extended math instructions. */
1268
	assert(!src0.negate);
1269
	assert(!src0.abs);
1270
	assert(!src1.negate);
1271
	assert(!src1.abs);
1272
 
1273
	/* Math is the same ISA format as other opcodes, except that CondModifier
1274
	 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1275
	 */
1276
	insn->header.destreg__conditionalmod = function;
1277
 
1278
	brw_set_dest(p, insn, dest);
1279
	brw_set_src0(p, insn, src0);
1280
	brw_set_src1(p, insn, src1);
1281
}
1282
 
1283
/**
1284
 * Extended math function, float[16].
1285
 * Use 2 send instructions.
1286
 */
1287
void brw_math_16(struct brw_compile *p,
1288
		 struct brw_reg dest,
1289
		 unsigned function,
1290
		 unsigned saturate,
1291
		 unsigned msg_reg_nr,
1292
		 struct brw_reg src,
1293
		 unsigned precision)
1294
{
1295
	struct brw_instruction *insn;
1296
 
1297
	if (p->gen >= 060) {
1298
		insn = brw_next_insn(p, BRW_OPCODE_MATH);
1299
 
1300
		/* Math is the same ISA format as other opcodes, except that CondModifier
1301
		 * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
1302
		 */
1303
		insn->header.destreg__conditionalmod = function;
1304
		insn->header.saturate = saturate;
1305
 
1306
		/* Source modifiers are ignored for extended math instructions. */
1307
		assert(!src.negate);
1308
		assert(!src.abs);
1309
 
1310
		brw_set_dest(p, insn, dest);
1311
		brw_set_src0(p, insn, src);
1312
		brw_set_src1(p, insn, brw_null_reg());
1313
		return;
1314
	}
1315
 
1316
	/* First instruction:
1317
	*/
1318
	brw_push_insn_state(p);
1319
	brw_set_predicate_control_flag_value(p, 0xff);
1320
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1321
 
1322
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1323
	insn->header.destreg__conditionalmod = msg_reg_nr;
1324
 
1325
	brw_set_dest(p, insn, dest);
1326
	brw_set_src0(p, insn, src);
1327
	brw_set_math_message(p, insn, function,
1328
			     BRW_MATH_INTEGER_UNSIGNED,
1329
			     precision,
1330
			     saturate,
1331
			     BRW_MATH_DATA_VECTOR);
1332
 
1333
	/* Second instruction:
1334
	*/
1335
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1336
	insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
1337
	insn->header.destreg__conditionalmod = msg_reg_nr+1;
1338
 
1339
	brw_set_dest(p, insn, __offset(dest,1));
1340
	brw_set_src0(p, insn, src);
1341
	brw_set_math_message(p, insn, function,
1342
			     BRW_MATH_INTEGER_UNSIGNED,
1343
			     precision,
1344
			     saturate,
1345
			     BRW_MATH_DATA_VECTOR);
1346
 
1347
	brw_pop_insn_state(p);
1348
}
1349
 
1350
/**
1351
 * Write a block of OWORDs (half a GRF each) from the scratch buffer,
1352
 * using a constant offset per channel.
1353
 *
1354
 * The offset must be aligned to oword size (16 bytes).  Used for
1355
 * register spilling.
1356
 */
1357
void brw_oword_block_write_scratch(struct brw_compile *p,
1358
				   struct brw_reg mrf,
1359
				   int num_regs,
1360
				   unsigned offset)
1361
{
1362
	uint32_t msg_control, msg_type;
1363
	int mlen;
1364
 
1365
	if (p->gen >= 060)
1366
		offset /= 16;
1367
 
1368
	mrf = __retype_ud(mrf);
1369
 
1370
	if (num_regs == 1) {
1371
		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1372
		mlen = 2;
1373
	} else {
1374
		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1375
		mlen = 3;
1376
	}
1377
 
1378
	/* Set up the message header.  This is g0, with g0.2 filled with
1379
	 * the offset.  We don't want to leave our offset around in g0 or
1380
	 * it'll screw up texture samples, so set it up inside the message
1381
	 * reg.
1382
	 */
1383
	{
1384
		brw_push_insn_state(p);
1385
		brw_set_mask_control(p, BRW_MASK_DISABLE);
1386
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1387
 
1388
		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1389
 
1390
		/* set message header global offset field (reg 0, element 2) */
1391
		brw_MOV(p,
1392
			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1393
			brw_imm_ud(offset));
1394
 
1395
		brw_pop_insn_state(p);
1396
	}
1397
 
1398
	{
1399
		struct brw_reg dest;
1400
		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1401
		int send_commit_msg;
1402
		struct brw_reg src_header = __retype_uw(brw_vec8_grf(0, 0));
1403
 
1404
		if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
1405
			insn->header.compression_control = BRW_COMPRESSION_NONE;
1406
			src_header = vec16(src_header);
1407
		}
1408
		assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
1409
		insn->header.destreg__conditionalmod = mrf.nr;
1410
 
1411
		/* Until gen6, writes followed by reads from the same location
1412
		 * are not guaranteed to be ordered unless write_commit is set.
1413
		 * If set, then a no-op write is issued to the destination
1414
		 * register to set a dependency, and a read from the destination
1415
		 * can be used to ensure the ordering.
1416
		 *
1417
		 * For gen6, only writes between different threads need ordering
1418
		 * protection.  Our use of DP writes is all about register
1419
		 * spilling within a thread.
1420
		 */
1421
		if (p->gen >= 060) {
1422
			dest = __retype_uw(vec16(brw_null_reg()));
1423
			send_commit_msg = 0;
1424
		} else {
1425
			dest = src_header;
1426
			send_commit_msg = 1;
1427
		}
1428
 
1429
		brw_set_dest(p, insn, dest);
1430
		if (p->gen >= 060) {
1431
			brw_set_src0(p, insn, mrf);
1432
		} else {
1433
			brw_set_src0(p, insn, brw_null_reg());
1434
		}
1435
 
1436
		if (p->gen >= 060)
1437
			msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1438
		else
1439
			msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
1440
 
1441
		brw_set_dp_write_message(p,
1442
					 insn,
1443
					 255, /* binding table index (255=stateless) */
1444
					 msg_control,
1445
					 msg_type,
1446
					 mlen,
1447
					 true, /* header_present */
1448
					 0, /* pixel scoreboard */
1449
					 send_commit_msg, /* response_length */
1450
					 0, /* eot */
1451
					 send_commit_msg);
1452
	}
1453
}
1454
 
1455
 
1456
/**
1457
 * Read a block of owords (half a GRF each) from the scratch buffer
1458
 * using a constant index per channel.
1459
 *
1460
 * Offset must be aligned to oword size (16 bytes).  Used for register
1461
 * spilling.
1462
 */
1463
void
1464
brw_oword_block_read_scratch(struct brw_compile *p,
1465
			     struct brw_reg dest,
1466
			     struct brw_reg mrf,
1467
			     int num_regs,
1468
			     unsigned offset)
1469
{
1470
	uint32_t msg_control;
1471
	int rlen;
1472
 
1473
	if (p->gen >= 060)
1474
		offset /= 16;
1475
 
1476
	mrf = __retype_ud(mrf);
1477
	dest = __retype_uw(dest);
1478
 
1479
	if (num_regs == 1) {
1480
		msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
1481
		rlen = 1;
1482
	} else {
1483
		msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
1484
		rlen = 2;
1485
	}
1486
 
1487
	{
1488
		brw_push_insn_state(p);
1489
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1490
		brw_set_mask_control(p, BRW_MASK_DISABLE);
1491
 
1492
		brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1493
 
1494
		/* set message header global offset field (reg 0, element 2) */
1495
		brw_MOV(p,
1496
			__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1497
			brw_imm_ud(offset));
1498
 
1499
		brw_pop_insn_state(p);
1500
	}
1501
 
1502
	{
1503
		struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
1504
 
1505
		assert(insn->header.predicate_control == 0);
1506
		insn->header.compression_control = BRW_COMPRESSION_NONE;
1507
		insn->header.destreg__conditionalmod = mrf.nr;
1508
 
1509
		brw_set_dest(p, insn, dest); /* UW? */
1510
		if (p->gen >= 060) {
1511
			brw_set_src0(p, insn, mrf);
1512
		} else {
1513
			brw_set_src0(p, insn, brw_null_reg());
1514
		}
1515
 
1516
		brw_set_dp_read_message(p,
1517
					insn,
1518
					255, /* binding table index (255=stateless) */
1519
					msg_control,
1520
					BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1521
					BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1522
					1, /* msg_length */
1523
					rlen);
1524
	}
1525
}
1526
 
1527
/**
1528
 * Read a float[4] vector from the data port Data Cache (const buffer).
1529
 * Location (in buffer) should be a multiple of 16.
1530
 * Used for fetching shader constants.
1531
 */
1532
void brw_oword_block_read(struct brw_compile *p,
1533
			  struct brw_reg dest,
1534
			  struct brw_reg mrf,
1535
			  uint32_t offset,
1536
			  uint32_t bind_table_index)
1537
{
1538
	struct brw_instruction *insn;
1539
 
1540
	/* On newer hardware, offset is in units of owords. */
1541
	if (p->gen >= 060)
1542
		offset /= 16;
1543
 
1544
	mrf = __retype_ud(mrf);
1545
 
1546
	brw_push_insn_state(p);
1547
	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1548
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1549
	brw_set_mask_control(p, BRW_MASK_DISABLE);
1550
 
1551
	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1552
 
1553
	/* set message header global offset field (reg 0, element 2) */
1554
	brw_MOV(p,
1555
		__retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, mrf.nr, 2)),
1556
		brw_imm_ud(offset));
1557
 
1558
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1559
	insn->header.destreg__conditionalmod = mrf.nr;
1560
 
1561
	/* cast dest to a uword[8] vector */
1562
	dest = __retype_uw(vec8(dest));
1563
 
1564
	brw_set_dest(p, insn, dest);
1565
	if (p->gen >= 060) {
1566
		brw_set_src0(p, insn, mrf);
1567
	} else {
1568
		brw_set_src0(p, insn, brw_null_reg());
1569
	}
1570
 
1571
	brw_set_dp_read_message(p,
1572
				insn,
1573
				bind_table_index,
1574
				BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
1575
				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
1576
				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1577
				1, /* msg_length */
1578
				1); /* response_length (1 reg, 2 owords!) */
1579
 
1580
	brw_pop_insn_state(p);
1581
}
1582
 
1583
/**
1584
 * Read a set of dwords from the data port Data Cache (const buffer).
1585
 *
1586
 * Location (in buffer) appears as UD offsets in the register after
1587
 * the provided mrf header reg.
1588
 */
1589
void brw_dword_scattered_read(struct brw_compile *p,
1590
			      struct brw_reg dest,
1591
			      struct brw_reg mrf,
1592
			      uint32_t bind_table_index)
1593
{
1594
	struct brw_instruction *insn;
1595
 
1596
	mrf = __retype_ud(mrf);
1597
 
1598
	brw_push_insn_state(p);
1599
	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1600
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1601
	brw_set_mask_control(p, BRW_MASK_DISABLE);
1602
	brw_MOV(p, mrf, __retype_ud(brw_vec8_grf(0, 0)));
1603
	brw_pop_insn_state(p);
1604
 
1605
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1606
	insn->header.destreg__conditionalmod = mrf.nr;
1607
 
1608
	/* cast dest to a uword[8] vector */
1609
	dest = __retype_uw(vec8(dest));
1610
 
1611
	brw_set_dest(p, insn, dest);
1612
	brw_set_src0(p, insn, brw_null_reg());
1613
 
1614
	brw_set_dp_read_message(p,
1615
				insn,
1616
				bind_table_index,
1617
				BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
1618
				BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
1619
				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1620
				2, /* msg_length */
1621
				1); /* response_length */
1622
}
1623
 
1624
/**
1625
 * Read float[4] constant(s) from VS constant buffer.
1626
 * For relative addressing, two float[4] constants will be read into 'dest'.
1627
 * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
1628
 */
1629
void brw_dp_READ_4_vs(struct brw_compile *p,
1630
                      struct brw_reg dest,
1631
                      unsigned location,
1632
                      unsigned bind_table_index)
1633
{
1634
	struct brw_instruction *insn;
1635
	unsigned msg_reg_nr = 1;
1636
 
1637
	if (p->gen >= 060)
1638
		location /= 16;
1639
 
1640
	/* Setup MRF[1] with location/offset into const buffer */
1641
	brw_push_insn_state(p);
1642
	brw_set_access_mode(p, BRW_ALIGN_1);
1643
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1644
	brw_set_mask_control(p, BRW_MASK_DISABLE);
1645
	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1646
	brw_MOV(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2)),
1647
		brw_imm_ud(location));
1648
	brw_pop_insn_state(p);
1649
 
1650
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1651
 
1652
	insn->header.predicate_control = BRW_PREDICATE_NONE;
1653
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1654
	insn->header.destreg__conditionalmod = msg_reg_nr;
1655
	insn->header.mask_control = BRW_MASK_DISABLE;
1656
 
1657
	brw_set_dest(p, insn, dest);
1658
	if (p->gen >= 060) {
1659
		brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
1660
	} else {
1661
		brw_set_src0(p, insn, brw_null_reg());
1662
	}
1663
 
1664
	brw_set_dp_read_message(p,
1665
				insn,
1666
				bind_table_index,
1667
				0,
1668
				BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
1669
				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1670
				1, /* msg_length */
1671
				1); /* response_length (1 Oword) */
1672
}
1673
 
1674
/**
1675
 * Read a float[4] constant per vertex from VS constant buffer, with
1676
 * relative addressing.
1677
 */
1678
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
1679
			       struct brw_reg dest,
1680
			       struct brw_reg addr_reg,
1681
			       unsigned offset,
1682
			       unsigned bind_table_index)
1683
{
1684
	struct brw_reg src = brw_vec8_grf(0, 0);
1685
	struct brw_instruction *insn;
1686
	int msg_type;
1687
 
1688
	/* Setup MRF[1] with offset into const buffer */
1689
	brw_push_insn_state(p);
1690
	brw_set_access_mode(p, BRW_ALIGN_1);
1691
	brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1692
	brw_set_mask_control(p, BRW_MASK_DISABLE);
1693
	brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1694
 
1695
	/* M1.0 is block offset 0, M1.4 is block offset 1, all other
1696
	 * fields ignored.
1697
	 */
1698
	brw_ADD(p, __retype_d(brw_message_reg(1)),
1699
		addr_reg, brw_imm_d(offset));
1700
	brw_pop_insn_state(p);
1701
 
1702
	gen6_resolve_implied_move(p, &src, 0);
1703
 
1704
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1705
	insn->header.predicate_control = BRW_PREDICATE_NONE;
1706
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1707
	insn->header.destreg__conditionalmod = 0;
1708
	insn->header.mask_control = BRW_MASK_DISABLE;
1709
 
1710
	brw_set_dest(p, insn, dest);
1711
	brw_set_src0(p, insn, src);
1712
 
1713
	if (p->gen >= 060)
1714
		msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1715
	else if (p->gen >= 045)
1716
		msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1717
	else
1718
		msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
1719
 
1720
	brw_set_dp_read_message(p,
1721
				insn,
1722
				bind_table_index,
1723
				BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
1724
				msg_type,
1725
				BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1726
				2, /* msg_length */
1727
				1); /* response_length */
1728
}
1729
 
1730
void brw_fb_WRITE(struct brw_compile *p,
1731
		  int dispatch_width,
1732
                  unsigned msg_reg_nr,
1733
                  struct brw_reg src0,
1734
                  unsigned msg_control,
1735
                  unsigned binding_table_index,
1736
                  unsigned msg_length,
1737
                  unsigned response_length,
1738
                  bool eot,
1739
                  bool header_present)
1740
{
1741
	struct brw_instruction *insn;
1742
	unsigned msg_type;
1743
	struct brw_reg dest;
1744
 
1745
	if (dispatch_width == 16)
1746
		dest = __retype_uw(vec16(brw_null_reg()));
1747
	else
1748
		dest = __retype_uw(vec8(brw_null_reg()));
1749
 
1750
	if (p->gen >= 060 && binding_table_index == 0) {
1751
		insn = brw_next_insn(p, BRW_OPCODE_SENDC);
1752
	} else {
1753
		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1754
	}
1755
	/* The execution mask is ignored for render target writes. */
1756
	insn->header.predicate_control = 0;
1757
	insn->header.compression_control = BRW_COMPRESSION_NONE;
1758
 
1759
	if (p->gen >= 060) {
1760
		/* headerless version, just submit color payload */
1761
		src0 = brw_message_reg(msg_reg_nr);
1762
 
1763
		msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1764
	} else {
1765
		insn->header.destreg__conditionalmod = msg_reg_nr;
1766
 
1767
		msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1768
	}
1769
 
1770
	brw_set_dest(p, insn, dest);
1771
	brw_set_src0(p, insn, src0);
1772
	brw_set_dp_write_message(p,
1773
				 insn,
1774
				 binding_table_index,
1775
				 msg_control,
1776
				 msg_type,
1777
				 msg_length,
1778
				 header_present,
1779
				 eot,
1780
				 response_length,
1781
				 eot,
1782
 
1783
}
1784
 
1785
/**
1786
 * Texture sample instruction.
1787
 * Note: the msg_type plus msg_length values determine exactly what kind
1788
 * of sampling operation is performed.  See volume 4, page 161 of docs.
1789
 */
1790
void brw_SAMPLE(struct brw_compile *p,
1791
		struct brw_reg dest,
1792
		unsigned msg_reg_nr,
1793
		struct brw_reg src0,
1794
		unsigned binding_table_index,
1795
		unsigned sampler,
1796
		unsigned writemask,
1797
		unsigned msg_type,
1798
		unsigned response_length,
1799
		unsigned msg_length,
1800
		bool header_present,
1801
		unsigned simd_mode)
1802
{
1803
	assert(writemask);
1804
 
1805
	if (p->gen < 050 || writemask != WRITEMASK_XYZW) {
1806
		struct brw_reg m1 = brw_message_reg(msg_reg_nr);
1807
 
1808
		writemask = ~writemask & WRITEMASK_XYZW;
1809
 
1810
		brw_push_insn_state(p);
1811
 
1812
		brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1813
		brw_set_mask_control(p, BRW_MASK_DISABLE);
1814
 
1815
		brw_MOV(p, __retype_ud(m1), __retype_ud(brw_vec8_grf(0,0)));
1816
		brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(writemask << 12));
1817
 
1818
		brw_pop_insn_state(p);
1819
 
1820
		src0 = __retype_uw(brw_null_reg());
1821
	}
1822
 
1823
	{
1824
		struct brw_instruction *insn;
1825
 
1826
		gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1827
 
1828
		insn = brw_next_insn(p, BRW_OPCODE_SEND);
1829
		insn->header.predicate_control = 0; /* XXX */
1830
		insn->header.compression_control = BRW_COMPRESSION_NONE;
1831
		if (p->gen < 060)
1832
			insn->header.destreg__conditionalmod = msg_reg_nr;
1833
 
1834
		brw_set_dest(p, insn, dest);
1835
		brw_set_src0(p, insn, src0);
1836
		brw_set_sampler_message(p, insn,
1837
					binding_table_index,
1838
					sampler,
1839
					msg_type,
1840
					response_length,
1841
					msg_length,
1842
					header_present,
1843
					simd_mode);
1844
	}
1845
}
1846
 
1847
/* All these variables are pretty confusing - we might be better off
1848
 * using bitmasks and macros for this, in the old style.  Or perhaps
1849
 * just having the caller instantiate the fields in dword3 itself.
1850
 */
1851
void brw_urb_WRITE(struct brw_compile *p,
1852
		   struct brw_reg dest,
1853
		   unsigned msg_reg_nr,
1854
		   struct brw_reg src0,
1855
		   bool allocate,
1856
		   bool used,
1857
		   unsigned msg_length,
1858
		   unsigned response_length,
1859
		   bool eot,
1860
		   bool writes_complete,
1861
		   unsigned offset,
1862
		   unsigned swizzle)
1863
{
1864
	struct brw_instruction *insn;
1865
 
1866
	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1867
 
1868
	if (p->gen >= 070) {
1869
		/* Enable Channel Masks in the URB_WRITE_HWORD message header */
1870
		brw_push_insn_state(p);
1871
		brw_set_access_mode(p, BRW_ALIGN_1);
1872
		brw_OR(p, __retype_ud(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5)),
1873
		       __retype_ud(brw_vec1_grf(0, 5)),
1874
		       brw_imm_ud(0xff00));
1875
		brw_pop_insn_state(p);
1876
	}
1877
 
1878
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1879
 
1880
	assert(msg_length < BRW_MAX_MRF);
1881
 
1882
	brw_set_dest(p, insn, dest);
1883
	brw_set_src0(p, insn, src0);
1884
	brw_set_src1(p, insn, brw_imm_d(0));
1885
 
1886
	if (p->gen <= 060)
1887
		insn->header.destreg__conditionalmod = msg_reg_nr;
1888
 
1889
	brw_set_urb_message(p,
1890
			    insn,
1891
			    allocate,
1892
			    used,
1893
			    msg_length,
1894
			    response_length,
1895
			    eot,
1896
			    writes_complete,
1897
			    offset,
1898
			    swizzle);
1899
}
1900
 
1901
static int
1902
brw_find_next_block_end(struct brw_compile *p, int start)
1903
{
1904
	int ip;
1905
 
1906
	for (ip = start + 1; ip < p->nr_insn; ip++) {
1907
		struct brw_instruction *insn = &p->store[ip];
1908
 
1909
		switch (insn->header.opcode) {
1910
		case BRW_OPCODE_ENDIF:
1911
		case BRW_OPCODE_ELSE:
1912
		case BRW_OPCODE_WHILE:
1913
			return ip;
1914
		}
1915
	}
1916
	assert(!"not reached");
1917
	return start + 1;
1918
}
1919
 
1920
/* There is no DO instruction on gen6, so to find the end of the loop
1921
 * we have to see if the loop is jumping back before our start
1922
 * instruction.
1923
 */
1924
static int
1925
brw_find_loop_end(struct brw_compile *p, int start)
1926
{
1927
	int ip;
1928
	int br = 2;
1929
 
1930
	for (ip = start + 1; ip < p->nr_insn; ip++) {
1931
		struct brw_instruction *insn = &p->store[ip];
1932
 
1933
		if (insn->header.opcode == BRW_OPCODE_WHILE) {
1934
			int jip = p->gen <= 070 ? insn->bits1.branch_gen6.jump_count
1935
				: insn->bits3.break_cont.jip;
1936
			if (ip + jip / br <= start)
1937
				return ip;
1938
		}
1939
	}
1940
	assert(!"not reached");
1941
	return start + 1;
1942
}
1943
 
1944
/* After program generation, go back and update the UIP and JIP of
1945
 * BREAK and CONT instructions to their correct locations.
1946
 */
1947
void
1948
brw_set_uip_jip(struct brw_compile *p)
1949
{
1950
	int ip;
1951
	int br = 2;
1952
 
1953
	if (p->gen <= 060)
1954
		return;
1955
 
1956
	for (ip = 0; ip < p->nr_insn; ip++) {
1957
		struct brw_instruction *insn = &p->store[ip];
1958
 
1959
		switch (insn->header.opcode) {
1960
		case BRW_OPCODE_BREAK:
1961
			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1962
			/* Gen7 UIP points to WHILE; Gen6 points just after it */
1963
			insn->bits3.break_cont.uip =
1964
				br * (brw_find_loop_end(p, ip) - ip + (p->gen <= 070 ? 1 : 0));
1965
			break;
1966
		case BRW_OPCODE_CONTINUE:
1967
			insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
1968
			insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
1969
 
1970
			assert(insn->bits3.break_cont.uip != 0);
1971
			assert(insn->bits3.break_cont.jip != 0);
1972
			break;
1973
		}
1974
	}
1975
}
1976
 
1977
void brw_ff_sync(struct brw_compile *p,
1978
		   struct brw_reg dest,
1979
		   unsigned msg_reg_nr,
1980
		   struct brw_reg src0,
1981
		   bool allocate,
1982
		   unsigned response_length,
1983
		   bool eot)
1984
{
1985
	struct brw_instruction *insn;
1986
 
1987
	gen6_resolve_implied_move(p, &src0, msg_reg_nr);
1988
 
1989
	insn = brw_next_insn(p, BRW_OPCODE_SEND);
1990
	brw_set_dest(p, insn, dest);
1991
	brw_set_src0(p, insn, src0);
1992
	brw_set_src1(p, insn, brw_imm_d(0));
1993
 
1994
	if (p->gen < 060)
1995
		insn->header.destreg__conditionalmod = msg_reg_nr;
1996
 
1997
	brw_set_ff_sync_message(p,
1998
				insn,
1999
				allocate,
2000
				response_length,
2001
				eot);
2002
}