Subversion Repositories Kolibri OS

Rev

Rev 3254 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3254 Serge 1
/*
2
   Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3
   Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4
   develop this 3D driver.
5
 
6
   Permission is hereby granted, free of charge, to any person obtaining
7
   a copy of this software and associated documentation files (the
8
   "Software"), to deal in the Software without restriction, including
9
   without limitation the rights to use, copy, modify, merge, publish,
10
   distribute, sublicense, and/or sell copies of the Software, and to
11
   permit persons to whom the Software is furnished to do so, subject to
12
   the following conditions:
13
 
14
   The above copyright notice and this permission notice (including the
15
   next paragraph) shall be included in all copies or substantial
16
   portions of the Software.
17
 
18
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
   IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 **********************************************************************/
27
/*
28
 * Authors:
29
 *   Keith Whitwell 
30
 */
31
 
32
 
33
#ifndef BRW_EU_H
34
#define BRW_EU_H
35
 
36
#include 
37
#include 
38
#include 
39
#include 
40
 
41
#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
42
#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43
 
44
#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
45
#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
46
#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
47
#define BRW_SWIZZLE_YYYY      BRW_SWIZZLE4(1,1,1,1)
48
#define BRW_SWIZZLE_ZZZZ      BRW_SWIZZLE4(2,2,2,2)
49
#define BRW_SWIZZLE_WWWW      BRW_SWIZZLE4(3,3,3,3)
50
#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
51
 
52
#define WRITEMASK_X 0x1
53
#define WRITEMASK_Y 0x2
54
#define WRITEMASK_Z 0x4
55
#define WRITEMASK_W 0x8
56
 
57
#define WRITEMASK_XY (WRITEMASK_X | WRITEMASK_Y)
58
#define WRITEMASK_XYZ (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z)
59
#define WRITEMASK_XYZW (WRITEMASK_X | WRITEMASK_Y | WRITEMASK_Z | WRITEMASK_W)
60
 
61
/** Number of general purpose registers (VS, WM, etc) */
62
#define BRW_MAX_GRF 128
63
 
64
/** Number of message register file registers */
65
#define BRW_MAX_MRF 16
66
 
67
 
68
#define BRW_ALIGN_1   0
69
#define BRW_ALIGN_16  1
70
 
71
#define BRW_ADDRESS_DIRECT                        0
72
#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
73
 
74
#define BRW_CHANNEL_X     0
75
#define BRW_CHANNEL_Y     1
76
#define BRW_CHANNEL_Z     2
77
#define BRW_CHANNEL_W     3
78
 
79
enum brw_compression {
80
	BRW_COMPRESSION_NONE,
81
	BRW_COMPRESSION_2NDHALF,
82
	BRW_COMPRESSION_COMPRESSED,
83
};
84
 
85
#define GEN6_COMPRESSION_1Q		0
86
#define GEN6_COMPRESSION_2Q		1
87
#define GEN6_COMPRESSION_3Q		2
88
#define GEN6_COMPRESSION_4Q		3
89
#define GEN6_COMPRESSION_1H		0
90
#define GEN6_COMPRESSION_2H		2
91
 
92
#define BRW_CONDITIONAL_NONE  0
93
#define BRW_CONDITIONAL_Z     1
94
#define BRW_CONDITIONAL_NZ    2
95
#define BRW_CONDITIONAL_EQ    1	/* Z */
96
#define BRW_CONDITIONAL_NEQ   2	/* NZ */
97
#define BRW_CONDITIONAL_G     3
98
#define BRW_CONDITIONAL_GE    4
99
#define BRW_CONDITIONAL_L     5
100
#define BRW_CONDITIONAL_LE    6
101
#define BRW_CONDITIONAL_R     7
102
#define BRW_CONDITIONAL_O     8
103
#define BRW_CONDITIONAL_U     9
104
 
105
#define BRW_DEBUG_NONE        0
106
#define BRW_DEBUG_BREAKPOINT  1
107
 
108
#define BRW_DEPENDENCY_NORMAL         0
109
#define BRW_DEPENDENCY_NOTCLEARED     1
110
#define BRW_DEPENDENCY_NOTCHECKED     2
111
#define BRW_DEPENDENCY_DISABLE        3
112
 
113
#define BRW_EXECUTE_1     0
114
#define BRW_EXECUTE_2     1
115
#define BRW_EXECUTE_4     2
116
#define BRW_EXECUTE_8     3
117
#define BRW_EXECUTE_16    4
118
#define BRW_EXECUTE_32    5
119
 
120
#define BRW_HORIZONTAL_STRIDE_0   0
121
#define BRW_HORIZONTAL_STRIDE_1   1
122
#define BRW_HORIZONTAL_STRIDE_2   2
123
#define BRW_HORIZONTAL_STRIDE_4   3
124
 
125
#define BRW_INSTRUCTION_NORMAL    0
126
#define BRW_INSTRUCTION_SATURATE  1
127
 
128
#define BRW_MASK_ENABLE   0
129
#define BRW_MASK_DISABLE  1
130
 
131
/** @{
132
 *
133
 * Gen6 has replaced "mask enable/disable" with WECtrl, which is
134
 * effectively the same but much simpler to think about.  Now, there
135
 * are two contributors ANDed together to whether channels are
136
 * executed: The predication on the instruction, and the channel write
137
 * enable.
138
 */
139
/**
140
 * This is the default value.  It means that a channel's write enable is set
141
 * if the per-channel IP is pointing at this instruction.
142
 */
143
#define BRW_WE_NORMAL		0
144
/**
145
 * This is used like BRW_MASK_DISABLE, and causes all channels to have
146
 * their write enable set.  Note that predication still contributes to
147
 * whether the channel actually gets written.
148
 */
149
#define BRW_WE_ALL		1
150
/** @} */
151
 
152
enum opcode {
153
	/* These are the actual hardware opcodes. */
154
	BRW_OPCODE_MOV =	1,
155
	BRW_OPCODE_SEL =	2,
156
	BRW_OPCODE_NOT =	4,
157
	BRW_OPCODE_AND =	5,
158
	BRW_OPCODE_OR =	6,
159
	BRW_OPCODE_XOR =	7,
160
	BRW_OPCODE_SHR =	8,
161
	BRW_OPCODE_SHL =	9,
162
	BRW_OPCODE_RSR =	10,
163
	BRW_OPCODE_RSL =	11,
164
	BRW_OPCODE_ASR =	12,
165
	BRW_OPCODE_CMP =	16,
166
	BRW_OPCODE_CMPN =	17,
167
	BRW_OPCODE_JMPI =	32,
168
	BRW_OPCODE_IF =	34,
169
	BRW_OPCODE_IFF =	35,
170
	BRW_OPCODE_ELSE =	36,
171
	BRW_OPCODE_ENDIF =	37,
172
	BRW_OPCODE_DO =	38,
173
	BRW_OPCODE_WHILE =	39,
174
	BRW_OPCODE_BREAK =	40,
175
	BRW_OPCODE_CONTINUE = 41,
176
	BRW_OPCODE_HALT =	42,
177
	BRW_OPCODE_MSAVE =	44,
178
	BRW_OPCODE_MRESTORE = 45,
179
	BRW_OPCODE_PUSH =	46,
180
	BRW_OPCODE_POP =	47,
181
	BRW_OPCODE_WAIT =	48,
182
	BRW_OPCODE_SEND =	49,
183
	BRW_OPCODE_SENDC =	50,
184
	BRW_OPCODE_MATH =	56,
185
	BRW_OPCODE_ADD =	64,
186
	BRW_OPCODE_MUL =	65,
187
	BRW_OPCODE_AVG =	66,
188
	BRW_OPCODE_FRC =	67,
189
	BRW_OPCODE_RNDU =	68,
190
	BRW_OPCODE_RNDD =	69,
191
	BRW_OPCODE_RNDE =	70,
192
	BRW_OPCODE_RNDZ =	71,
193
	BRW_OPCODE_MAC =	72,
194
	BRW_OPCODE_MACH =	73,
195
	BRW_OPCODE_LZD =	74,
196
	BRW_OPCODE_SAD2 =	80,
197
	BRW_OPCODE_SADA2 =	81,
198
	BRW_OPCODE_DP4 =	84,
199
	BRW_OPCODE_DPH =	85,
200
	BRW_OPCODE_DP3 =	86,
201
	BRW_OPCODE_DP2 =	87,
202
	BRW_OPCODE_DPA2 =	88,
203
	BRW_OPCODE_LINE =	89,
204
	BRW_OPCODE_PLN =	90,
205
	BRW_OPCODE_NOP =	126,
206
 
207
	/* These are compiler backend opcodes that get translated into other
208
	 * instructions.
209
	 */
210
	FS_OPCODE_FB_WRITE = 128,
211
	SHADER_OPCODE_RCP,
212
	SHADER_OPCODE_RSQ,
213
	SHADER_OPCODE_SQRT,
214
	SHADER_OPCODE_EXP2,
215
	SHADER_OPCODE_LOG2,
216
	SHADER_OPCODE_POW,
217
	SHADER_OPCODE_SIN,
218
	SHADER_OPCODE_COS,
219
	FS_OPCODE_DDX,
220
	FS_OPCODE_DDY,
221
	FS_OPCODE_PIXEL_X,
222
	FS_OPCODE_PIXEL_Y,
223
	FS_OPCODE_CINTERP,
224
	FS_OPCODE_LINTERP,
225
	FS_OPCODE_TEX,
226
	FS_OPCODE_TXB,
227
	FS_OPCODE_TXD,
228
	FS_OPCODE_TXF,
229
	FS_OPCODE_TXL,
230
	FS_OPCODE_TXS,
231
	FS_OPCODE_DISCARD,
232
	FS_OPCODE_SPILL,
233
	FS_OPCODE_UNSPILL,
234
	FS_OPCODE_PULL_CONSTANT_LOAD,
235
 
236
	VS_OPCODE_URB_WRITE,
237
	VS_OPCODE_SCRATCH_READ,
238
	VS_OPCODE_SCRATCH_WRITE,
239
	VS_OPCODE_PULL_CONSTANT_LOAD,
240
};
241
 
242
#define BRW_PREDICATE_NONE             0
243
#define BRW_PREDICATE_NORMAL           1
244
#define BRW_PREDICATE_ALIGN1_ANYV             2
245
#define BRW_PREDICATE_ALIGN1_ALLV             3
246
#define BRW_PREDICATE_ALIGN1_ANY2H            4
247
#define BRW_PREDICATE_ALIGN1_ALL2H            5
248
#define BRW_PREDICATE_ALIGN1_ANY4H            6
249
#define BRW_PREDICATE_ALIGN1_ALL4H            7
250
#define BRW_PREDICATE_ALIGN1_ANY8H            8
251
#define BRW_PREDICATE_ALIGN1_ALL8H            9
252
#define BRW_PREDICATE_ALIGN1_ANY16H           10
253
#define BRW_PREDICATE_ALIGN1_ALL16H           11
254
#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
255
#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
256
#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
257
#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
258
#define BRW_PREDICATE_ALIGN16_ANY4H           6
259
#define BRW_PREDICATE_ALIGN16_ALL4H           7
260
 
261
#define BRW_ARCHITECTURE_REGISTER_FILE    0
262
#define BRW_GENERAL_REGISTER_FILE         1
263
#define BRW_MESSAGE_REGISTER_FILE         2
264
#define BRW_IMMEDIATE_VALUE               3
265
 
266
#define BRW_REGISTER_TYPE_UD  0
267
#define BRW_REGISTER_TYPE_D   1
268
#define BRW_REGISTER_TYPE_UW  2
269
#define BRW_REGISTER_TYPE_W   3
270
#define BRW_REGISTER_TYPE_UB  4
271
#define BRW_REGISTER_TYPE_B   5
272
#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
273
#define BRW_REGISTER_TYPE_HF  6
274
#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
275
#define BRW_REGISTER_TYPE_F   7
276
 
277
#define BRW_ARF_NULL                  0x00
278
#define BRW_ARF_ADDRESS               0x10
279
#define BRW_ARF_ACCUMULATOR           0x20
280
#define BRW_ARF_FLAG                  0x30
281
#define BRW_ARF_MASK                  0x40
282
#define BRW_ARF_MASK_STACK            0x50
283
#define BRW_ARF_MASK_STACK_DEPTH      0x60
284
#define BRW_ARF_STATE                 0x70
285
#define BRW_ARF_CONTROL               0x80
286
#define BRW_ARF_NOTIFICATION_COUNT    0x90
287
#define BRW_ARF_IP                    0xA0
288
 
289
#define BRW_MRF_COMPR4			(1 << 7)
290
 
291
#define BRW_AMASK   0
292
#define BRW_IMASK   1
293
#define BRW_LMASK   2
294
#define BRW_CMASK   3
295
 
296
#define BRW_THREAD_NORMAL     0
297
#define BRW_THREAD_ATOMIC     1
298
#define BRW_THREAD_SWITCH     2
299
 
300
#define BRW_VERTICAL_STRIDE_0                 0
301
#define BRW_VERTICAL_STRIDE_1                 1
302
#define BRW_VERTICAL_STRIDE_2                 2
303
#define BRW_VERTICAL_STRIDE_4                 3
304
#define BRW_VERTICAL_STRIDE_8                 4
305
#define BRW_VERTICAL_STRIDE_16                5
306
#define BRW_VERTICAL_STRIDE_32                6
307
#define BRW_VERTICAL_STRIDE_64                7
308
#define BRW_VERTICAL_STRIDE_128               8
309
#define BRW_VERTICAL_STRIDE_256               9
310
#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
311
 
312
#define BRW_WIDTH_1       0
313
#define BRW_WIDTH_2       1
314
#define BRW_WIDTH_4       2
315
#define BRW_WIDTH_8       3
316
#define BRW_WIDTH_16      4
317
 
318
#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
319
#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
320
#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
321
#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
322
#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
323
#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
324
#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
325
#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
326
#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
327
#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
328
#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
329
#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
330
 
331
#define BRW_POLYGON_FACING_FRONT      0
332
#define BRW_POLYGON_FACING_BACK       1
333
 
334
#define BRW_MESSAGE_TARGET_NULL               0
335
#define BRW_MESSAGE_TARGET_MATH               1 /* reserved on GEN6 */
336
#define BRW_MESSAGE_TARGET_SAMPLER            2
337
#define BRW_MESSAGE_TARGET_GATEWAY            3
338
#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
339
#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
340
#define BRW_MESSAGE_TARGET_URB                6
341
#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
342
 
343
#define GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE  4
344
#define GEN6_MESSAGE_TARGET_DP_RENDER_CACHE   5
345
#define GEN6_MESSAGE_TARGET_DP_CONST_CACHE    9
346
 
347
#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
348
#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
349
#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
350
 
351
#define BRW_SAMPLER_MESSAGE_SAMPLE	              0
352
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
353
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
354
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
355
#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
356
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
357
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
358
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
359
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
360
#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
361
#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
362
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0
363
#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE  1
364
#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
365
#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
366
#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
367
#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
368
#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
369
 
370
#define GEN5_SAMPLER_MESSAGE_SAMPLE              0
371
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS         1
372
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD          2
373
#define GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE      3
374
#define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS       4
375
#define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5
376
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE  6
377
#define GEN5_SAMPLER_MESSAGE_SAMPLE_LD           7
378
#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO      10
379
 
380
/* for GEN5 only */
381
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
382
#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
383
#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
384
#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
385
 
386
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
387
#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
388
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
389
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
390
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
391
 
392
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
393
#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
394
 
395
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
396
#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
397
 
398
/* This one stays the same across generations. */
399
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
400
/* GEN4 */
401
#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
402
#define BRW_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          2
403
#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
404
/* G45, GEN5 */
405
#define G45_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
406
#define G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
407
#define G45_DATAPORT_READ_MESSAGE_AVC_LOOP_FILTER_READ	    3
408
#define G45_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
409
#define G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
410
/* GEN6 */
411
#define GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ	    1
412
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     2
413
#define GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ          4
414
#define GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ  5
415
#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      6
416
 
417
#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
418
#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
419
#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
420
 
421
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
422
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
423
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
424
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
425
#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
426
 
427
/**
428
 * Message target: Shared Function ID for where to SEND a message.
429
 *
430
 * These are enumerated in the ISA reference under "send - Send Message".
431
 * In particular, see the following tables:
432
 * - G45 PRM, Volume 4, Table 14-15 "Message Descriptor Definition"
433
 * - Sandybridge PRM, Volume 4 Part 2, Table 8-16 "Extended Message Descriptor"
434
 * - BSpec, Volume 1a (GPU Overview) / Graphics Processing Engine (GPE) /
435
 *   Overview / GPE Function IDs
436
 */
437
enum brw_message_target {
438
   BRW_SFID_NULL                     = 0,
439
   BRW_SFID_MATH                     = 1, /* Only valid on Gen4-5 */
440
   BRW_SFID_SAMPLER                  = 2,
441
   BRW_SFID_MESSAGE_GATEWAY          = 3,
442
   BRW_SFID_DATAPORT_READ            = 4,
443
   BRW_SFID_DATAPORT_WRITE           = 5,
444
   BRW_SFID_URB                      = 6,
445
   BRW_SFID_THREAD_SPAWNER           = 7,
446
 
447
   GEN6_SFID_DATAPORT_SAMPLER_CACHE  = 4,
448
   GEN6_SFID_DATAPORT_RENDER_CACHE   = 5,
449
   GEN6_SFID_DATAPORT_CONSTANT_CACHE = 9,
450
 
451
   GEN7_SFID_DATAPORT_DATA_CACHE     = 10,
452
};
453
 
454
#define GEN7_MESSAGE_TARGET_DP_DATA_CACHE     10
455
 
456
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
457
#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
458
#define BRW_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE                2
459
#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
460
#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
461
#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
462
#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
463
 
464
/* GEN6 */
465
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE              7
466
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE               8
467
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE          9
468
#define GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE               10
469
#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE           11
470
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE             12
471
#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE               13
472
#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE       14
473
 
474
#define BRW_MATH_FUNCTION_INV                              1
475
#define BRW_MATH_FUNCTION_LOG                              2
476
#define BRW_MATH_FUNCTION_EXP                              3
477
#define BRW_MATH_FUNCTION_SQRT                             4
478
#define BRW_MATH_FUNCTION_RSQ                              5
479
#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
480
#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
481
#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
482
#define BRW_MATH_FUNCTION_TAN                              9 /* gen4 */
483
#define BRW_MATH_FUNCTION_FDIV                             9 /* gen6+ */
484
#define BRW_MATH_FUNCTION_POW                              10
485
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
486
#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
487
#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
488
 
489
#define BRW_MATH_INTEGER_UNSIGNED     0
490
#define BRW_MATH_INTEGER_SIGNED       1
491
 
492
#define BRW_MATH_PRECISION_FULL        0
493
#define BRW_MATH_PRECISION_PARTIAL     1
494
 
495
#define BRW_MATH_SATURATE_NONE         0
496
#define BRW_MATH_SATURATE_SATURATE     1
497
 
498
#define BRW_MATH_DATA_VECTOR  0
499
#define BRW_MATH_DATA_SCALAR  1
500
 
501
#define BRW_URB_OPCODE_WRITE  0
502
 
503
#define BRW_URB_SWIZZLE_NONE          0
504
#define BRW_URB_SWIZZLE_INTERLEAVE    1
505
#define BRW_URB_SWIZZLE_TRANSPOSE     2
506
 
507
#define BRW_SCRATCH_SPACE_SIZE_1K     0
508
#define BRW_SCRATCH_SPACE_SIZE_2K     1
509
#define BRW_SCRATCH_SPACE_SIZE_4K     2
510
#define BRW_SCRATCH_SPACE_SIZE_8K     3
511
#define BRW_SCRATCH_SPACE_SIZE_16K    4
512
#define BRW_SCRATCH_SPACE_SIZE_32K    5
513
#define BRW_SCRATCH_SPACE_SIZE_64K    6
514
#define BRW_SCRATCH_SPACE_SIZE_128K   7
515
#define BRW_SCRATCH_SPACE_SIZE_256K   8
516
#define BRW_SCRATCH_SPACE_SIZE_512K   9
517
#define BRW_SCRATCH_SPACE_SIZE_1M     10
518
#define BRW_SCRATCH_SPACE_SIZE_2M     11
519
 
520
#define REG_SIZE (8*4)
521
 
522
struct brw_instruction {
523
	struct {
524
		unsigned opcode:7;
525
		unsigned pad:1;
526
		unsigned access_mode:1;
527
		unsigned mask_control:1;
528
		unsigned dependency_control:2;
529
		unsigned compression_control:2; /* gen6: quater control */
530
		unsigned thread_control:2;
531
		unsigned predicate_control:4;
532
		unsigned predicate_inverse:1;
533
		unsigned execution_size:3;
534
		/**
535
		 * Conditional Modifier for most instructions.  On Gen6+, this is also
536
		 * used for the SEND instruction's Message Target/SFID.
537
		 */
538
		unsigned destreg__conditionalmod:4;
539
		unsigned acc_wr_control:1;
540
		unsigned cmpt_control:1;
541
		unsigned debug_control:1;
542
		unsigned saturate:1;
543
	} header;
544
 
545
	union {
546
		struct {
547
			unsigned dest_reg_file:2;
548
			unsigned dest_reg_type:3;
549
			unsigned src0_reg_file:2;
550
			unsigned src0_reg_type:3;
551
			unsigned src1_reg_file:2;
552
			unsigned src1_reg_type:3;
553
			unsigned pad:1;
554
			unsigned dest_subreg_nr:5;
555
			unsigned dest_reg_nr:8;
556
			unsigned dest_horiz_stride:2;
557
			unsigned dest_address_mode:1;
558
		} da1;
559
 
560
		struct {
561
			unsigned dest_reg_file:2;
562
			unsigned dest_reg_type:3;
563
			unsigned src0_reg_file:2;
564
			unsigned src0_reg_type:3;
565
			unsigned src1_reg_file:2;        /* 0x00000c00 */
566
			unsigned src1_reg_type:3;        /* 0x00007000 */
567
			unsigned pad:1;
568
			int dest_indirect_offset:10;	/* offset against the deref'd address reg */
569
			unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */
570
			unsigned dest_horiz_stride:2;
571
			unsigned dest_address_mode:1;
572
		} ia1;
573
 
574
		struct {
575
			unsigned dest_reg_file:2;
576
			unsigned dest_reg_type:3;
577
			unsigned src0_reg_file:2;
578
			unsigned src0_reg_type:3;
579
			unsigned src1_reg_file:2;
580
			unsigned src1_reg_type:3;
581
			unsigned pad:1;
582
			unsigned dest_writemask:4;
583
			unsigned dest_subreg_nr:1;
584
			unsigned dest_reg_nr:8;
585
			unsigned dest_horiz_stride:2;
586
			unsigned dest_address_mode:1;
587
		} da16;
588
 
589
		struct {
590
			unsigned dest_reg_file:2;
591
			unsigned dest_reg_type:3;
592
			unsigned src0_reg_file:2;
593
			unsigned src0_reg_type:3;
594
			unsigned pad0:6;
595
			unsigned dest_writemask:4;
596
			int dest_indirect_offset:6;
597
			unsigned dest_subreg_nr:3;
598
			unsigned dest_horiz_stride:2;
599
			unsigned dest_address_mode:1;
600
		} ia16;
601
 
602
		struct {
603
			unsigned dest_reg_file:2;
604
			unsigned dest_reg_type:3;
605
			unsigned src0_reg_file:2;
606
			unsigned src0_reg_type:3;
607
			unsigned src1_reg_file:2;
608
			unsigned src1_reg_type:3;
609
			unsigned pad:1;
610
 
611
			int jump_count:16;
612
		} branch_gen6;
613
 
614
		struct {
615
			unsigned dest_reg_file:1;
616
			unsigned flag_subreg_num:1;
617
			unsigned pad0:2;
618
			unsigned src0_abs:1;
619
			unsigned src0_negate:1;
620
			unsigned src1_abs:1;
621
			unsigned src1_negate:1;
622
			unsigned src2_abs:1;
623
			unsigned src2_negate:1;
624
			unsigned pad1:7;
625
			unsigned dest_writemask:4;
626
			unsigned dest_subreg_nr:3;
627
			unsigned dest_reg_nr:8;
628
		} da3src;
629
	} bits1;
630
 
631
 
632
	union {
633
		struct {
634
			unsigned src0_subreg_nr:5;
635
			unsigned src0_reg_nr:8;
636
			unsigned src0_abs:1;
637
			unsigned src0_negate:1;
638
			unsigned src0_address_mode:1;
639
			unsigned src0_horiz_stride:2;
640
			unsigned src0_width:3;
641
			unsigned src0_vert_stride:4;
642
			unsigned flag_subreg_nr:1;
643
			unsigned flag_reg_nr:1;
644
			unsigned pad:5;
645
		} da1;
646
 
647
		struct {
648
			int src0_indirect_offset:10;
649
			unsigned src0_subreg_nr:3;
650
			unsigned src0_abs:1;
651
			unsigned src0_negate:1;
652
			unsigned src0_address_mode:1;
653
			unsigned src0_horiz_stride:2;
654
			unsigned src0_width:3;
655
			unsigned src0_vert_stride:4;
656
			unsigned flag_subreg_nr:1;
657
			unsigned flag_reg_nr:1;
658
			unsigned pad:5;
659
		} ia1;
660
 
661
		struct {
662
			unsigned src0_swz_x:2;
663
			unsigned src0_swz_y:2;
664
			unsigned src0_subreg_nr:1;
665
			unsigned src0_reg_nr:8;
666
			unsigned src0_abs:1;
667
			unsigned src0_negate:1;
668
			unsigned src0_address_mode:1;
669
			unsigned src0_swz_z:2;
670
			unsigned src0_swz_w:2;
671
			unsigned pad0:1;
672
			unsigned src0_vert_stride:4;
673
			unsigned flag_subreg_nr:1;
674
			unsigned flag_reg_nr:1;
675
			unsigned pad1:5;
676
		} da16;
677
 
678
		struct {
679
			unsigned src0_swz_x:2;
680
			unsigned src0_swz_y:2;
681
			int src0_indirect_offset:6;
682
			unsigned src0_subreg_nr:3;
683
			unsigned src0_abs:1;
684
			unsigned src0_negate:1;
685
			unsigned src0_address_mode:1;
686
			unsigned src0_swz_z:2;
687
			unsigned src0_swz_w:2;
688
			unsigned pad0:1;
689
			unsigned src0_vert_stride:4;
690
			unsigned flag_subreg_nr:1;
691
			unsigned flag_reg_nr:1;
692
			unsigned pad1:5;
693
		} ia16;
694
 
695
		/* Extended Message Descriptor for Ironlake (Gen5) SEND instruction.
696
		 *
697
		 * Does not apply to Gen6+.  The SFID/message target moved to bits
698
		 * 27:24 of the header (destreg__conditionalmod); EOT is in bits3.
699
		 */
700
		struct {
701
			unsigned pad:26;
702
			unsigned end_of_thread:1;
703
			unsigned pad1:1;
704
			unsigned sfid:4;
705
		} send_gen5;  /* for Ironlake only */
706
 
707
		struct {
708
			unsigned src0_rep_ctrl:1;
709
			unsigned src0_swizzle:8;
710
			unsigned src0_subreg_nr:3;
711
			unsigned src0_reg_nr:8;
712
			unsigned pad0:1;
713
			unsigned src1_rep_ctrl:1;
714
			unsigned src1_swizzle:8;
715
			unsigned src1_subreg_nr_low:2;
716
		} da3src;
717
	} bits2;
718
 
719
	union {
720
		struct {
721
			unsigned src1_subreg_nr:5;
722
			unsigned src1_reg_nr:8;
723
			unsigned src1_abs:1;
724
			unsigned src1_negate:1;
725
			unsigned src1_address_mode:1;
726
			unsigned src1_horiz_stride:2;
727
			unsigned src1_width:3;
728
			unsigned src1_vert_stride:4;
729
			unsigned pad0:7;
730
		} da1;
731
 
732
		struct {
733
			unsigned src1_swz_x:2;
734
			unsigned src1_swz_y:2;
735
			unsigned src1_subreg_nr:1;
736
			unsigned src1_reg_nr:8;
737
			unsigned src1_abs:1;
738
			unsigned src1_negate:1;
739
			unsigned src1_address_mode:1;
740
			unsigned src1_swz_z:2;
741
			unsigned src1_swz_w:2;
742
			unsigned pad1:1;
743
			unsigned src1_vert_stride:4;
744
			unsigned pad2:7;
745
		} da16;
746
 
747
		struct {
748
			int src1_indirect_offset:10;
749
			unsigned src1_subreg_nr:3;
750
			unsigned src1_abs:1;
751
			unsigned src1_negate:1;
752
			unsigned src1_address_mode:1;
753
			unsigned src1_horiz_stride:2;
754
			unsigned src1_width:3;
755
			unsigned src1_vert_stride:4;
756
			unsigned flag_subreg_nr:1;
757
			unsigned flag_reg_nr:1;
758
			unsigned pad1:5;
759
		} ia1;
760
 
761
		struct {
762
			unsigned src1_swz_x:2;
763
			unsigned src1_swz_y:2;
764
			int  src1_indirect_offset:6;
765
			unsigned src1_subreg_nr:3;
766
			unsigned src1_abs:1;
767
			unsigned src1_negate:1;
768
			unsigned pad0:1;
769
			unsigned src1_swz_z:2;
770
			unsigned src1_swz_w:2;
771
			unsigned pad1:1;
772
			unsigned src1_vert_stride:4;
773
			unsigned flag_subreg_nr:1;
774
			unsigned flag_reg_nr:1;
775
			unsigned pad2:5;
776
		} ia16;
777
 
778
		struct {
779
			int jump_count:16;	/* note: signed */
780
			unsigned pop_count:4;
781
			unsigned pad0:12;
782
		} if_else;
783
 
784
		/* This is also used for gen7 IF/ELSE instructions */
785
		struct {
786
			/* Signed jump distance to the ip to jump to if all channels
787
			 * are disabled after the break or continue.  It should point
788
			 * to the end of the innermost control flow block, as that's
789
			 * where some channel could get re-enabled.
790
			 */
791
			int jip:16;
792
 
793
			/* Signed jump distance to the location to resume execution
794
			 * of this channel if it's enabled for the break or continue.
795
			 */
796
			int uip:16;
797
		} break_cont;
798
 
799
		/**
800
		 * \defgroup SEND instructions / Message Descriptors
801
		 *
802
		 * @{
803
		 */
804
 
805
		/**
806
		 * Generic Message Descriptor for Gen4 SEND instructions.  The structs
807
		 * below expand function_control to something specific for their
808
		 * message.  Due to struct packing issues, they duplicate these bits.
809
		 *
810
		 * See the G45 PRM, Volume 4, Table 14-15.
811
		 */
812
		struct {
813
			unsigned function_control:16;
814
			unsigned response_length:4;
815
			unsigned msg_length:4;
816
			unsigned msg_target:4;
817
			unsigned pad1:3;
818
			unsigned end_of_thread:1;
819
		} generic;
820
 
821
		/**
822
		 * Generic Message Descriptor for Gen5-7 SEND instructions.
823
		 *
824
		 * See the Sandybridge PRM, Volume 2 Part 2, Table 8-15.  (Sadly, most
825
		 * of the information on the SEND instruction is missing from the public
826
		 * Ironlake PRM.)
827
		 *
828
		 * The table claims that bit 31 is reserved/MBZ on Gen6+, but it lies.
829
		 * According to the SEND instruction description:
830
		 * "The MSb of the message description, the EOT field, always comes from
831
		 *  bit 127 of the instruction word"...which is bit 31 of this field.
832
		 */
833
		struct {
834
			unsigned function_control:19;
835
			unsigned header_present:1;
836
			unsigned response_length:5;
837
			unsigned msg_length:4;
838
			unsigned pad1:2;
839
			unsigned end_of_thread:1;
840
		} generic_gen5;
841
 
842
		/** G45 PRM, Volume 4, Section 6.1.1.1 */
843
		struct {
844
			unsigned function:4;
845
			unsigned int_type:1;
846
			unsigned precision:1;
847
			unsigned saturate:1;
848
			unsigned data_type:1;
849
			unsigned pad0:8;
850
			unsigned response_length:4;
851
			unsigned msg_length:4;
852
			unsigned msg_target:4;
853
			unsigned pad1:3;
854
			unsigned end_of_thread:1;
855
		} math;
856
 
857
		/** Ironlake PRM, Volume 4 Part 1, Section 6.1.1.1 */
858
		struct {
859
			unsigned function:4;
860
			unsigned int_type:1;
861
			unsigned precision:1;
862
			unsigned saturate:1;
863
			unsigned data_type:1;
864
			unsigned snapshot:1;
865
			unsigned pad0:10;
866
			unsigned header_present:1;
867
			unsigned response_length:5;
868
			unsigned msg_length:4;
869
			unsigned pad1:2;
870
			unsigned end_of_thread:1;
871
		} math_gen5;
872
 
873
		/** G45 PRM, Volume 4, Section 4.8.1.1.1 [DevBW] and [DevCL] */
874
		struct {
875
			unsigned binding_table_index:8;
876
			unsigned sampler:4;
877
			unsigned return_format:2;
878
			unsigned msg_type:2;
879
			unsigned response_length:4;
880
			unsigned msg_length:4;
881
			unsigned msg_target:4;
882
			unsigned pad1:3;
883
			unsigned end_of_thread:1;
884
		} sampler;
885
 
886
		/** G45 PRM, Volume 4, Section 4.8.1.1.2 [DevCTG] */
887
		struct {
888
			unsigned binding_table_index:8;
889
			unsigned sampler:4;
890
			unsigned msg_type:4;
891
			unsigned response_length:4;
892
			unsigned msg_length:4;
893
			unsigned msg_target:4;
894
			unsigned pad1:3;
895
			unsigned end_of_thread:1;
896
		} sampler_g4x;
897
 
898
		/** Ironlake PRM, Volume 4 Part 1, Section 4.11.1.1.3 */
899
		struct {
900
			unsigned binding_table_index:8;
901
			unsigned sampler:4;
902
			unsigned msg_type:4;
903
			unsigned simd_mode:2;
904
			unsigned pad0:1;
905
			unsigned header_present:1;
906
			unsigned response_length:5;
907
			unsigned msg_length:4;
908
			unsigned pad1:2;
909
			unsigned end_of_thread:1;
910
		} sampler_gen5;
911
 
912
		struct {
913
			unsigned binding_table_index:8;
914
			unsigned sampler:4;
915
			unsigned msg_type:5;
916
			unsigned simd_mode:2;
917
			unsigned header_present:1;
918
			unsigned response_length:5;
919
			unsigned msg_length:4;
920
			unsigned pad1:2;
921
			unsigned end_of_thread:1;
922
		} sampler_gen7;
923
 
924
		struct brw_urb_immediate {
925
			unsigned opcode:4;
926
			unsigned offset:6;
927
			unsigned swizzle_control:2;
928
			unsigned pad:1;
929
			unsigned allocate:1;
930
			unsigned used:1;
931
			unsigned complete:1;
932
			unsigned response_length:4;
933
			unsigned msg_length:4;
934
			unsigned msg_target:4;
935
			unsigned pad1:3;
936
			unsigned end_of_thread:1;
937
		} urb;
938
 
939
		struct {
940
			unsigned opcode:4;
941
			unsigned offset:6;
942
			unsigned swizzle_control:2;
943
			unsigned pad:1;
944
			unsigned allocate:1;
945
			unsigned used:1;
946
			unsigned complete:1;
947
			unsigned pad0:3;
948
			unsigned header_present:1;
949
			unsigned response_length:5;
950
			unsigned msg_length:4;
951
			unsigned pad1:2;
952
			unsigned end_of_thread:1;
953
		} urb_gen5;
954
 
955
		struct {
956
			unsigned opcode:3;
957
			unsigned offset:11;
958
			unsigned swizzle_control:1;
959
			unsigned complete:1;
960
			unsigned per_slot_offset:1;
961
			unsigned pad0:2;
962
			unsigned header_present:1;
963
			unsigned response_length:5;
964
			unsigned msg_length:4;
965
			unsigned pad1:2;
966
			unsigned end_of_thread:1;
967
		} urb_gen7;
968
 
969
		/** 965 PRM, Volume 4, Section 5.10.1.1: Message Descriptor */
970
		struct {
971
			unsigned binding_table_index:8;
972
			unsigned msg_control:4;
973
			unsigned msg_type:2;
974
			unsigned target_cache:2;
975
			unsigned response_length:4;
976
			unsigned msg_length:4;
977
			unsigned msg_target:4;
978
			unsigned pad1:3;
979
			unsigned end_of_thread:1;
980
		} dp_read;
981
 
982
		/** G45 PRM, Volume 4, Section 5.10.1.1.2 */
983
		struct {
984
			unsigned binding_table_index:8;
985
			unsigned msg_control:3;
986
			unsigned msg_type:3;
987
			unsigned target_cache:2;
988
			unsigned response_length:4;
989
			unsigned msg_length:4;
990
			unsigned msg_target:4;
991
			unsigned pad1:3;
992
			unsigned end_of_thread:1;
993
		} dp_read_g4x;
994
 
995
		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
996
		struct {
997
			unsigned binding_table_index:8;
998
			unsigned msg_control:3;
999
			unsigned msg_type:3;
1000
			unsigned target_cache:2;
1001
			unsigned pad0:3;
1002
			unsigned header_present:1;
1003
			unsigned response_length:5;
1004
			unsigned msg_length:4;
1005
			unsigned pad1:2;
1006
			unsigned end_of_thread:1;
1007
		} dp_read_gen5;
1008
 
1009
		/** G45 PRM, Volume 4, Section 5.10.1.1.2.  For both Gen4 and G45. */
1010
		struct {
1011
			unsigned binding_table_index:8;
1012
			unsigned msg_control:3;
1013
			unsigned last_render_target:1;
1014
			unsigned msg_type:3;
1015
			unsigned send_commit_msg:1;
1016
			unsigned response_length:4;
1017
			unsigned msg_length:4;
1018
			unsigned msg_target:4;
1019
			unsigned pad1:3;
1020
			unsigned end_of_thread:1;
1021
		} dp_write;
1022
 
1023
		/** Ironlake PRM, Volume 4 Part 1, Section 5.10.2.1.2. */
1024
		struct {
1025
			unsigned binding_table_index:8;
1026
			unsigned msg_control:3;
1027
			unsigned last_render_target:1;
1028
			unsigned msg_type:3;
1029
			unsigned send_commit_msg:1;
1030
			unsigned pad0:3;
1031
			unsigned header_present:1;
1032
			unsigned response_length:5;
1033
			unsigned msg_length:4;
1034
			unsigned pad1:2;
1035
			unsigned end_of_thread:1;
1036
		} dp_write_gen5;
1037
 
1038
		/**
1039
		 * Message for the Sandybridge Sampler Cache or Constant Cache Data Port.
1040
		 *
1041
		 * See the Sandybridge PRM, Volume 4 Part 1, Section 3.9.2.1.1.
1042
		 **/
1043
		struct {
1044
			unsigned binding_table_index:8;
1045
			unsigned msg_control:5;
1046
			unsigned msg_type:3;
1047
			unsigned pad0:3;
1048
			unsigned header_present:1;
1049
			unsigned response_length:5;
1050
			unsigned msg_length:4;
1051
			unsigned pad1:2;
1052
			unsigned end_of_thread:1;
1053
		} gen6_dp_sampler_const_cache;
1054
 
1055
		/**
1056
		 * Message for the Sandybridge Render Cache Data Port.
1057
		 *
1058
		 * Most fields are defined in the Sandybridge PRM, Volume 4 Part 1,
1059
		 * Section 3.9.2.1.1: Message Descriptor.
1060
		 *
1061
		 * "Slot Group Select" and "Last Render Target" are part of the
1062
		 * 5-bit message control for Render Target Write messages.  See
1063
		 * Section 3.9.9.2.1 of the same volume.
1064
		 */
1065
		struct {
1066
			unsigned binding_table_index:8;
1067
			unsigned msg_control:3;
1068
			unsigned slot_group_select:1;
1069
			unsigned last_render_target:1;
1070
			unsigned msg_type:4;
1071
			unsigned send_commit_msg:1;
1072
			unsigned pad0:1;
1073
			unsigned header_present:1;
1074
			unsigned response_length:5;
1075
			unsigned msg_length:4;
1076
			unsigned pad1:2;
1077
			unsigned end_of_thread:1;
1078
		} gen6_dp;
1079
 
1080
		/**
1081
		 * Message for any of the Gen7 Data Port caches.
1082
		 *
1083
		 * Most fields are defined in BSpec volume 5c.2 Data Port / Messages /
1084
		 * Data Port Messages / Message Descriptor.  Once again, "Slot Group
1085
		 * Select" and "Last Render Target" are part of the 6-bit message
1086
		 * control for Render Target Writes.
1087
		 */
1088
		struct {
1089
			unsigned binding_table_index:8;
1090
			unsigned msg_control:3;
1091
			unsigned slot_group_select:1;
1092
			unsigned last_render_target:1;
1093
			unsigned msg_control_pad:1;
1094
			unsigned msg_type:4;
1095
			unsigned pad1:1;
1096
			unsigned header_present:1;
1097
			unsigned response_length:5;
1098
			unsigned msg_length:4;
1099
			unsigned pad2:2;
1100
			unsigned end_of_thread:1;
1101
		} gen7_dp;
1102
		/** @} */
1103
 
1104
		struct {
1105
			unsigned src1_subreg_nr_high:1;
1106
			unsigned src1_reg_nr:8;
1107
			unsigned pad0:1;
1108
			unsigned src2_rep_ctrl:1;
1109
			unsigned src2_swizzle:8;
1110
			unsigned src2_subreg_nr:3;
1111
			unsigned src2_reg_nr:8;
1112
			unsigned pad1:2;
1113
		} da3src;
1114
 
1115
		int d;
1116
		unsigned ud;
1117
		float f;
1118
	} bits3;
1119
};
1120
 
1121
 
1122
/* These aren't hardware structs, just something useful for us to pass around:
1123
 *
1124
 * Align1 operation has a lot of control over input ranges.  Used in
1125
 * WM programs to implement shaders decomposed into "channel serial"
1126
 * or "structure of array" form:
1127
 */
1128
struct brw_reg {
1129
	unsigned type:4;
1130
	unsigned file:2;
1131
	unsigned nr:8;
1132
	unsigned subnr:5;		/* :1 in align16 */
1133
	unsigned negate:1;		/* source only */
1134
	unsigned abs:1;		/* source only */
1135
	unsigned vstride:4;		/* source only */
1136
	unsigned width:3;		/* src only, align1 only */
1137
	unsigned hstride:2;   		/* align1 only */
1138
	unsigned address_mode:1;	/* relative addressing, hopefully! */
1139
	unsigned pad0:1;
1140
 
1141
	union {
1142
		struct {
1143
			unsigned swizzle:8;		/* src only, align16 only */
1144
			unsigned writemask:4;		/* dest only, align16 only */
1145
			int  indirect_offset:10;	/* relative addressing offset */
1146
			unsigned pad1:10;		/* two dwords total */
1147
		} bits;
1148
 
1149
		float f;
1150
		int   d;
1151
		unsigned ud;
1152
	} dw1;
1153
};
1154
 
1155
struct brw_indirect {
1156
	unsigned addr_subnr:4;
1157
	int addr_offset:10;
1158
	unsigned pad:18;
1159
};
1160
 
1161
#define BRW_EU_MAX_INSN_STACK 5
1162
#define BRW_EU_MAX_INSN 10000
1163
 
1164
struct brw_compile {
1165
	struct brw_instruction *store;
1166
	unsigned nr_insn;
1167
 
1168
	int gen;
1169
 
1170
	/* Allow clients to push/pop instruction state:
1171
	*/
1172
	struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
1173
	bool compressed_stack[BRW_EU_MAX_INSN_STACK];
1174
	struct brw_instruction *current;
1175
 
1176
	unsigned flag_value;
1177
	bool single_program_flow;
1178
	bool compressed;
1179
 
1180
	/* Control flow stacks:
1181
	 * - if_stack contains IF and ELSE instructions which must be patched
1182
	 *   (and popped) once the matching ENDIF instruction is encountered.
1183
	 */
1184
	struct brw_instruction **if_stack;
1185
	int if_stack_depth;
1186
	int if_stack_array_size;
1187
};
1188
 
1189
static inline int type_sz(unsigned type)
1190
{
1191
	switch (type) {
1192
	case BRW_REGISTER_TYPE_UD:
1193
	case BRW_REGISTER_TYPE_D:
1194
	case BRW_REGISTER_TYPE_F:
1195
		return 4;
1196
	case BRW_REGISTER_TYPE_HF:
1197
	case BRW_REGISTER_TYPE_UW:
1198
	case BRW_REGISTER_TYPE_W:
1199
		return 2;
1200
	case BRW_REGISTER_TYPE_UB:
1201
	case BRW_REGISTER_TYPE_B:
1202
		return 1;
1203
	default:
1204
		return 0;
1205
	}
1206
}
1207
 
1208
/**
1209
 * Construct a brw_reg.
1210
 * \param file  one of the BRW_x_REGISTER_FILE values
1211
 * \param nr  register number/index
1212
 * \param subnr  register sub number
1213
 * \param type  one of BRW_REGISTER_TYPE_x
1214
 * \param vstride  one of BRW_VERTICAL_STRIDE_x
1215
 * \param width  one of BRW_WIDTH_x
1216
 * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
1217
 * \param swizzle  one of BRW_SWIZZLE_x
1218
 * \param writemask  WRITEMASK_X/Y/Z/W bitfield
1219
 */
1220
static inline struct brw_reg brw_reg(unsigned file,
1221
				     unsigned nr,
1222
				     unsigned subnr,
1223
				     unsigned type,
1224
				     unsigned vstride,
1225
				     unsigned width,
1226
				     unsigned hstride,
1227
				     unsigned swizzle,
1228
				     unsigned writemask)
1229
{
1230
	struct brw_reg reg;
1231
	if (file == BRW_GENERAL_REGISTER_FILE)
1232
		assert(nr < BRW_MAX_GRF);
1233
	else if (file == BRW_MESSAGE_REGISTER_FILE)
1234
		assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1235
	else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
1236
		assert(nr <= BRW_ARF_IP);
1237
 
1238
	reg.type = type;
1239
	reg.file = file;
1240
	reg.nr = nr;
1241
	reg.subnr = subnr * type_sz(type);
1242
	reg.negate = 0;
1243
	reg.abs = 0;
1244
	reg.vstride = vstride;
1245
	reg.width = width;
1246
	reg.hstride = hstride;
1247
	reg.address_mode = BRW_ADDRESS_DIRECT;
1248
	reg.pad0 = 0;
1249
 
1250
	/* Could do better: If the reg is r5.3<0;1,0>, we probably want to
1251
	 * set swizzle and writemask to W, as the lower bits of subnr will
1252
	 * be lost when converted to align16.  This is probably too much to
1253
	 * keep track of as you'd want it adjusted by suboffset(), etc.
1254
	 * Perhaps fix up when converting to align16?
1255
	 */
1256
	reg.dw1.bits.swizzle = swizzle;
1257
	reg.dw1.bits.writemask = writemask;
1258
	reg.dw1.bits.indirect_offset = 0;
1259
	reg.dw1.bits.pad1 = 0;
1260
	return reg;
1261
}
1262
 
1263
/** Construct float[16] register */
1264
static inline struct brw_reg brw_vec16_reg(unsigned file,
1265
					   unsigned nr,
1266
					   unsigned subnr)
1267
{
1268
	return brw_reg(file,
1269
		       nr,
1270
		       subnr,
1271
		       BRW_REGISTER_TYPE_F,
1272
		       BRW_VERTICAL_STRIDE_16,
1273
		       BRW_WIDTH_16,
1274
		       BRW_HORIZONTAL_STRIDE_1,
1275
		       BRW_SWIZZLE_XYZW,
1276
		       WRITEMASK_XYZW);
1277
}
1278
 
1279
/** Construct float[8] register */
1280
static inline struct brw_reg brw_vec8_reg(unsigned file,
1281
					  unsigned nr,
1282
					  unsigned subnr)
1283
{
1284
	return brw_reg(file,
1285
		       nr,
1286
		       subnr,
1287
		       BRW_REGISTER_TYPE_F,
1288
		       BRW_VERTICAL_STRIDE_8,
1289
		       BRW_WIDTH_8,
1290
		       BRW_HORIZONTAL_STRIDE_1,
1291
		       BRW_SWIZZLE_XYZW,
1292
		       WRITEMASK_XYZW);
1293
}
1294
 
1295
/** Construct float[4] register */
1296
static inline struct brw_reg brw_vec4_reg(unsigned file,
1297
					  unsigned nr,
1298
					  unsigned subnr)
1299
{
1300
	return brw_reg(file,
1301
		       nr,
1302
		       subnr,
1303
		       BRW_REGISTER_TYPE_F,
1304
		       BRW_VERTICAL_STRIDE_4,
1305
		       BRW_WIDTH_4,
1306
		       BRW_HORIZONTAL_STRIDE_1,
1307
		       BRW_SWIZZLE_XYZW,
1308
		       WRITEMASK_XYZW);
1309
}
1310
 
1311
/** Construct float[2] register */
1312
static inline struct brw_reg brw_vec2_reg(unsigned file,
1313
					  unsigned nr,
1314
					  unsigned subnr)
1315
{
1316
	return brw_reg(file,
1317
		       nr,
1318
		       subnr,
1319
		       BRW_REGISTER_TYPE_F,
1320
		       BRW_VERTICAL_STRIDE_2,
1321
		       BRW_WIDTH_2,
1322
		       BRW_HORIZONTAL_STRIDE_1,
1323
		       BRW_SWIZZLE_XYXY,
1324
		       WRITEMASK_XY);
1325
}
1326
 
1327
/** Construct float[1] register */
1328
static inline struct brw_reg brw_vec1_reg(unsigned file,
1329
					  unsigned nr,
1330
					  unsigned subnr)
1331
{
1332
	return brw_reg(file,
1333
		       nr,
1334
		       subnr,
1335
		       BRW_REGISTER_TYPE_F,
1336
		       BRW_VERTICAL_STRIDE_0,
1337
		       BRW_WIDTH_1,
1338
		       BRW_HORIZONTAL_STRIDE_0,
1339
		       BRW_SWIZZLE_XXXX,
1340
		       WRITEMASK_X);
1341
}
1342
 
1343
 
1344
static inline struct brw_reg __retype(struct brw_reg reg,
1345
				      unsigned type)
1346
{
1347
	reg.type = type;
1348
	return reg;
1349
}
1350
 
1351
static inline struct brw_reg __retype_d(struct brw_reg reg)
1352
{
1353
	return __retype(reg, BRW_REGISTER_TYPE_D);
1354
}
1355
 
1356
static inline struct brw_reg __retype_ud(struct brw_reg reg)
1357
{
1358
	return __retype(reg, BRW_REGISTER_TYPE_UD);
1359
}
1360
 
1361
static inline struct brw_reg __retype_uw(struct brw_reg reg)
1362
{
1363
	return __retype(reg, BRW_REGISTER_TYPE_UW);
1364
}
1365
 
1366
static inline struct brw_reg __sechalf(struct brw_reg reg)
1367
{
1368
	if (reg.vstride)
1369
		reg.nr++;
1370
	return reg;
1371
}
1372
 
1373
static inline struct brw_reg __suboffset(struct brw_reg reg,
1374
					 unsigned delta)
1375
{
1376
	reg.subnr += delta * type_sz(reg.type);
1377
	return reg;
1378
}
1379
 
1380
static inline struct brw_reg __offset(struct brw_reg reg,
1381
				      unsigned delta)
1382
{
1383
	reg.nr += delta;
1384
	return reg;
1385
}
1386
 
1387
static inline struct brw_reg byte_offset(struct brw_reg reg,
1388
					 unsigned bytes)
1389
{
1390
	unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
1391
	reg.nr = newoffset / REG_SIZE;
1392
	reg.subnr = newoffset % REG_SIZE;
1393
	return reg;
1394
}
1395
 
1396
 
1397
/** Construct unsigned word[16] register */
1398
static inline struct brw_reg brw_uw16_reg(unsigned file,
1399
					  unsigned nr,
1400
					  unsigned subnr)
1401
{
1402
	return __suboffset(__retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1403
}
1404
 
1405
/** Construct unsigned word[8] register */
1406
static inline struct brw_reg brw_uw8_reg(unsigned file,
1407
					 unsigned nr,
1408
					 unsigned subnr)
1409
{
1410
	return __suboffset(__retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1411
}
1412
 
1413
/** Construct unsigned word[1] register */
1414
static inline struct brw_reg brw_uw1_reg(unsigned file,
1415
					 unsigned nr,
1416
					 unsigned subnr)
1417
{
1418
	return __suboffset(__retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
1419
}
1420
 
1421
static inline struct brw_reg brw_imm_reg(unsigned type)
1422
{
1423
	return brw_reg( BRW_IMMEDIATE_VALUE,
1424
			0,
1425
			0,
1426
			type,
1427
			BRW_VERTICAL_STRIDE_0,
1428
			BRW_WIDTH_1,
1429
			BRW_HORIZONTAL_STRIDE_0,
1430
			0,
1431
			0);
1432
}
1433
 
1434
/** Construct float immediate register */
1435
static inline struct brw_reg brw_imm_f(float f)
1436
{
1437
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
1438
	imm.dw1.f = f;
1439
	return imm;
1440
}
1441
 
1442
/** Construct integer immediate register */
1443
static inline struct brw_reg brw_imm_d(int d)
1444
{
1445
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
1446
	imm.dw1.d = d;
1447
	return imm;
1448
}
1449
 
1450
/** Construct uint immediate register */
1451
static inline struct brw_reg brw_imm_ud(unsigned ud)
1452
{
1453
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
1454
	imm.dw1.ud = ud;
1455
	return imm;
1456
}
1457
 
1458
/** Construct ushort immediate register */
1459
static inline struct brw_reg brw_imm_uw(uint16_t uw)
1460
{
1461
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
1462
	imm.dw1.ud = uw | (uw << 16);
1463
	return imm;
1464
}
1465
 
1466
/** Construct short immediate register */
1467
static inline struct brw_reg brw_imm_w(int16_t w)
1468
{
1469
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
1470
	imm.dw1.d = w | (w << 16);
1471
	return imm;
1472
}
1473
 
1474
/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
1475
 * numbers alias with _V and _VF below:
1476
 */
1477
 
1478
/** Construct vector of eight signed half-byte values */
1479
static inline struct brw_reg brw_imm_v(unsigned v)
1480
{
1481
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
1482
	imm.vstride = BRW_VERTICAL_STRIDE_0;
1483
	imm.width = BRW_WIDTH_8;
1484
	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1485
	imm.dw1.ud = v;
1486
	return imm;
1487
}
1488
 
1489
/** Construct vector of four 8-bit float values */
1490
static inline struct brw_reg brw_imm_vf(unsigned v)
1491
{
1492
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1493
	imm.vstride = BRW_VERTICAL_STRIDE_0;
1494
	imm.width = BRW_WIDTH_4;
1495
	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1496
	imm.dw1.ud = v;
1497
	return imm;
1498
}
1499
 
1500
#define VF_ZERO 0x0
1501
#define VF_ONE  0x30
1502
#define VF_NEG  (1<<7)
1503
 
1504
static inline struct brw_reg brw_imm_vf4(unsigned v0,
1505
					 unsigned v1,
1506
					 unsigned v2,
1507
					 unsigned v3)
1508
{
1509
	struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
1510
	imm.vstride = BRW_VERTICAL_STRIDE_0;
1511
	imm.width = BRW_WIDTH_4;
1512
	imm.hstride = BRW_HORIZONTAL_STRIDE_1;
1513
	imm.dw1.ud = ((v0 << 0) |
1514
		      (v1 << 8) |
1515
		      (v2 << 16) |
1516
		      (v3 << 24));
1517
	return imm;
1518
}
1519
 
1520
static inline struct brw_reg brw_address(struct brw_reg reg)
1521
{
1522
	return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
1523
}
1524
 
1525
/** Construct float[1] general-purpose register */
1526
static inline struct brw_reg brw_vec1_grf(unsigned nr, unsigned subnr)
1527
{
1528
	return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1529
}
1530
 
1531
/** Construct float[2] general-purpose register */
1532
static inline struct brw_reg brw_vec2_grf(unsigned nr, unsigned subnr)
1533
{
1534
	return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1535
}
1536
 
1537
/** Construct float[4] general-purpose register */
1538
static inline struct brw_reg brw_vec4_grf(unsigned nr, unsigned subnr)
1539
{
1540
	return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1541
}
1542
 
1543
/** Construct float[8] general-purpose register */
1544
static inline struct brw_reg brw_vec8_grf(unsigned nr, unsigned subnr)
1545
{
1546
	return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1547
}
1548
 
1549
static inline struct brw_reg brw_uw8_grf(unsigned nr, unsigned subnr)
1550
{
1551
	return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1552
}
1553
 
1554
static inline struct brw_reg brw_uw16_grf(unsigned nr, unsigned subnr)
1555
{
1556
	return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
1557
}
1558
 
1559
/** Construct null register (usually used for setting condition codes) */
1560
static inline struct brw_reg brw_null_reg(void)
1561
{
1562
	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1563
			    BRW_ARF_NULL,
1564
			    0);
1565
}
1566
 
1567
static inline struct brw_reg brw_address_reg(unsigned subnr)
1568
{
1569
	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1570
			   BRW_ARF_ADDRESS,
1571
			   subnr);
1572
}
1573
 
1574
/* If/else instructions break in align16 mode if writemask & swizzle
1575
 * aren't xyzw.  This goes against the convention for other scalar
1576
 * regs:
1577
 */
1578
static inline struct brw_reg brw_ip_reg(void)
1579
{
1580
	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1581
		       BRW_ARF_IP,
1582
		       0,
1583
		       BRW_REGISTER_TYPE_UD,
1584
		       BRW_VERTICAL_STRIDE_4, /* ? */
1585
		       BRW_WIDTH_1,
1586
		       BRW_HORIZONTAL_STRIDE_0,
1587
		       BRW_SWIZZLE_XYZW, /* NOTE! */
1588
		       WRITEMASK_XYZW); /* NOTE! */
1589
}
1590
 
1591
static inline struct brw_reg brw_acc_reg(void)
1592
{
1593
	return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1594
			    BRW_ARF_ACCUMULATOR,
1595
			    0);
1596
}
1597
 
1598
static inline struct brw_reg brw_notification_1_reg(void)
1599
{
1600
	return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1601
		       BRW_ARF_NOTIFICATION_COUNT,
1602
		       1,
1603
		       BRW_REGISTER_TYPE_UD,
1604
		       BRW_VERTICAL_STRIDE_0,
1605
		       BRW_WIDTH_1,
1606
		       BRW_HORIZONTAL_STRIDE_0,
1607
		       BRW_SWIZZLE_XXXX,
1608
		       WRITEMASK_X);
1609
}
1610
 
1611
static inline struct brw_reg brw_flag_reg(void)
1612
{
1613
	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1614
			   BRW_ARF_FLAG,
1615
			   0);
1616
}
1617
 
1618
static inline struct brw_reg brw_mask_reg(unsigned subnr)
1619
{
1620
	return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
1621
			   BRW_ARF_MASK,
1622
			   subnr);
1623
}
1624
 
1625
static inline struct brw_reg brw_message_reg(unsigned nr)
1626
{
1627
	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1628
	return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
1629
}
1630
 
1631
static inline struct brw_reg brw_message4_reg(unsigned nr,
1632
					      int subnr)
1633
{
1634
	assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
1635
	return brw_vec4_reg(BRW_MESSAGE_REGISTER_FILE, nr, subnr);
1636
}
1637
 
1638
/* This is almost always called with a numeric constant argument, so
1639
 * make things easy to evaluate at compile time:
1640
 */
1641
static inline unsigned cvt(unsigned val)
1642
{
1643
	switch (val) {
1644
	case 0: return 0;
1645
	case 1: return 1;
1646
	case 2: return 2;
1647
	case 4: return 3;
1648
	case 8: return 4;
1649
	case 16: return 5;
1650
	case 32: return 6;
1651
	}
1652
	return 0;
1653
}
1654
 
1655
static inline struct brw_reg __stride(struct brw_reg reg,
1656
				    unsigned vstride,
1657
				    unsigned width,
1658
				    unsigned hstride)
1659
{
1660
	reg.vstride = cvt(vstride);
1661
	reg.width = cvt(width) - 1;
1662
	reg.hstride = cvt(hstride);
1663
	return reg;
1664
}
1665
 
1666
static inline struct brw_reg vec16(struct brw_reg reg)
1667
{
1668
	return __stride(reg, 16,16,1);
1669
}
1670
 
1671
static inline struct brw_reg vec8(struct brw_reg reg)
1672
{
1673
	return __stride(reg, 8,8,1);
1674
}
1675
 
1676
static inline struct brw_reg vec4(struct brw_reg reg)
1677
{
1678
	return __stride(reg, 4,4,1);
1679
}
1680
 
1681
static inline struct brw_reg vec2(struct brw_reg reg)
1682
{
1683
	return __stride(reg, 2,2,1);
1684
}
1685
 
1686
static inline struct brw_reg vec1(struct brw_reg reg)
1687
{
1688
	return __stride(reg, 0,1,0);
1689
}
1690
 
1691
static inline struct brw_reg get_element(struct brw_reg reg, unsigned elt)
1692
{
1693
	return vec1(__suboffset(reg, elt));
1694
}
1695
 
1696
static inline struct brw_reg get_element_ud(struct brw_reg reg, unsigned elt)
1697
{
1698
	return vec1(__suboffset(__retype(reg, BRW_REGISTER_TYPE_UD), elt));
1699
}
1700
 
1701
static inline struct brw_reg brw_swizzle(struct brw_reg reg,
1702
					 unsigned x,
1703
					 unsigned y,
1704
					 unsigned z,
1705
					 unsigned w)
1706
{
1707
	assert(reg.file != BRW_IMMEDIATE_VALUE);
1708
 
1709
	reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
1710
					    BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
1711
					    BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
1712
					    BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
1713
	return reg;
1714
}
1715
 
1716
static inline struct brw_reg brw_swizzle1(struct brw_reg reg,
1717
					  unsigned x)
1718
{
1719
	return brw_swizzle(reg, x, x, x, x);
1720
}
1721
 
1722
static inline struct brw_reg brw_writemask(struct brw_reg reg,
1723
					   unsigned mask)
1724
{
1725
	assert(reg.file != BRW_IMMEDIATE_VALUE);
1726
	reg.dw1.bits.writemask &= mask;
1727
	return reg;
1728
}
1729
 
1730
static inline struct brw_reg brw_set_writemask(struct brw_reg reg,
1731
					       unsigned mask)
1732
{
1733
	assert(reg.file != BRW_IMMEDIATE_VALUE);
1734
	reg.dw1.bits.writemask = mask;
1735
	return reg;
1736
}
1737
 
1738
static inline struct brw_reg brw_negate(struct brw_reg reg)
1739
{
1740
	reg.negate ^= 1;
1741
	return reg;
1742
}
1743
 
1744
static inline struct brw_reg brw_abs(struct brw_reg reg)
1745
{
1746
	reg.abs = 1;
1747
	return reg;
1748
}
1749
 
1750
/***********************************************************************
1751
*/
1752
static inline struct brw_reg brw_vec4_indirect(unsigned subnr,
1753
					       int offset)
1754
{
1755
	struct brw_reg reg =  brw_vec4_grf(0, 0);
1756
	reg.subnr = subnr;
1757
	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1758
	reg.dw1.bits.indirect_offset = offset;
1759
	return reg;
1760
}
1761
 
1762
static inline struct brw_reg brw_vec1_indirect(unsigned subnr,
1763
					       int offset)
1764
{
1765
	struct brw_reg reg =  brw_vec1_grf(0, 0);
1766
	reg.subnr = subnr;
1767
	reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
1768
	reg.dw1.bits.indirect_offset = offset;
1769
	return reg;
1770
}
1771
 
1772
static inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset)
1773
{
1774
	return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1775
}
1776
 
1777
static inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset)
1778
{
1779
	return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
1780
}
1781
 
1782
static inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset)
1783
{
1784
	return __retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
1785
}
1786
 
1787
static inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset)
1788
{
1789
	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
1790
}
1791
 
1792
static inline struct brw_reg deref_1d(struct brw_indirect ptr, int offset)
1793
{
1794
	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
1795
}
1796
 
1797
static inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset)
1798
{
1799
	return __retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
1800
}
1801
 
1802
static inline struct brw_reg get_addr_reg(struct brw_indirect ptr)
1803
{
1804
	return brw_address_reg(ptr.addr_subnr);
1805
}
1806
 
1807
static inline struct brw_indirect brw_indirect_offset(struct brw_indirect ptr, int offset)
1808
{
1809
	ptr.addr_offset += offset;
1810
	return ptr;
1811
}
1812
 
1813
static inline struct brw_indirect brw_indirect(unsigned addr_subnr, int offset)
1814
{
1815
	struct brw_indirect ptr;
1816
	ptr.addr_subnr = addr_subnr;
1817
	ptr.addr_offset = offset;
1818
	ptr.pad = 0;
1819
	return ptr;
1820
}
1821
 
1822
/** Do two brw_regs refer to the same register? */
1823
static inline bool brw_same_reg(struct brw_reg r1, struct brw_reg r2)
1824
{
1825
	return r1.file == r2.file && r1.nr == r2.nr;
1826
}
1827
 
1828
static inline struct brw_instruction *current_insn( struct brw_compile *p)
1829
{
1830
	return &p->store[p->nr_insn];
1831
}
1832
 
1833
static inline void brw_set_predicate_control( struct brw_compile *p, unsigned pc )
1834
{
1835
	p->current->header.predicate_control = pc;
1836
}
1837
 
1838
static inline void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse)
1839
{
1840
	p->current->header.predicate_inverse = predicate_inverse;
1841
}
1842
 
1843
static inline void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional )
1844
{
1845
	p->current->header.destreg__conditionalmod = conditional;
1846
}
1847
 
1848
static inline void brw_set_access_mode(struct brw_compile *p, unsigned access_mode)
1849
{
1850
	p->current->header.access_mode = access_mode;
1851
}
1852
 
1853
static inline void brw_set_mask_control(struct brw_compile *p, unsigned value)
1854
{
1855
	p->current->header.mask_control = value;
1856
}
1857
 
1858
static inline void brw_set_saturate(struct brw_compile *p, unsigned value)
1859
{
1860
	p->current->header.saturate = value;
1861
}
1862
 
1863
static inline void brw_set_acc_write_control(struct brw_compile *p, unsigned value)
1864
{
1865
	if (p->gen >= 060)
1866
		p->current->header.acc_wr_control = value;
1867
}
1868
 
1869
void brw_pop_insn_state(struct brw_compile *p);
1870
void brw_push_insn_state(struct brw_compile *p);
1871
void brw_set_compression_control(struct brw_compile *p, enum brw_compression control);
1872
void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value );
1873
 
1874
void brw_compile_init(struct brw_compile *p, int gen, void *store);
1875
 
1876
void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
1877
		  struct brw_reg dest);
1878
void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
1879
		  struct brw_reg reg);
1880
void brw_set_src1(struct brw_compile *p,
1881
		  struct brw_instruction *insn,
1882
		  struct brw_reg reg);
1883
 
1884
void gen6_resolve_implied_move(struct brw_compile *p,
1885
			       struct brw_reg *src,
1886
			       unsigned msg_reg_nr);
1887
 
1888
static inline struct brw_instruction *
1889
brw_next_insn(struct brw_compile *p, unsigned opcode)
1890
{
1891
	struct brw_instruction *insn;
1892
 
1893
	assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
1894
 
1895
	insn = &p->store[p->nr_insn++];
1896
	*insn = *p->current;
1897
 
1898
	if (p->current->header.destreg__conditionalmod) {
1899
		p->current->header.destreg__conditionalmod = 0;
1900
		p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
1901
	}
1902
 
1903
	insn->header.opcode = opcode;
1904
	return insn;
1905
}
1906
 
1907
/* Helpers for regular instructions: */
1908
#define ALU1(OP)							\
1909
static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1910
					       struct brw_reg dest,	\
1911
					       struct brw_reg src0)	\
1912
{									\
1913
   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);			\
1914
}
1915
 
1916
#define ALU2(OP)							\
1917
static inline struct brw_instruction *brw_##OP(struct brw_compile *p,	\
1918
					       struct brw_reg dest,	\
1919
					       struct brw_reg src0,	\
1920
						struct brw_reg src1)	\
1921
{									\
1922
   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);		\
1923
}
1924
 
1925
/* Rounding operations (other than RNDD) require two instructions - the first
1926
 * stores a rounded value (possibly the wrong way) in the dest register, but
1927
 * also sets a per-channel "increment bit" in the flag register.  A predicated
1928
 * add of 1.0 fixes dest to contain the desired result.
1929
 *
1930
 * Sandybridge and later appear to round correctly without an ADD.
1931
 */
1932
#define ROUND(OP)							\
1933
static inline void brw_##OP(struct brw_compile *p,			\
1934
			    struct brw_reg dest,			\
1935
			    struct brw_reg src)				\
1936
{									\
1937
	struct brw_instruction *rnd, *add;				\
1938
	rnd = brw_next_insn(p, BRW_OPCODE_##OP);			\
1939
	brw_set_dest(p, rnd, dest);					\
1940
	brw_set_src0(p, rnd, src);					\
1941
	if (p->gen < 060) {						\
1942
		/* turn on round-increments */				\
1943
		rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
1944
		add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));		\
1945
		add->header.predicate_control = BRW_PREDICATE_NORMAL;	\
1946
	}								\
1947
}
1948
 
1949
static inline struct brw_instruction *brw_alu1(struct brw_compile *p,
1950
					       unsigned opcode,
1951
					       struct brw_reg dest,
1952
					       struct brw_reg src)
1953
{
1954
	struct brw_instruction *insn = brw_next_insn(p, opcode);
1955
	brw_set_dest(p, insn, dest);
1956
	brw_set_src0(p, insn, src);
1957
	return insn;
1958
}
1959
 
1960
static inline struct brw_instruction *brw_alu2(struct brw_compile *p,
1961
					       unsigned opcode,
1962
					       struct brw_reg dest,
1963
					       struct brw_reg src0,
1964
					       struct brw_reg src1 )
1965
{
1966
	struct brw_instruction *insn = brw_next_insn(p, opcode);
1967
	brw_set_dest(p, insn, dest);
1968
	brw_set_src0(p, insn, src0);
1969
	brw_set_src1(p, insn, src1);
1970
	return insn;
1971
}
1972
 
1973
static inline struct brw_instruction *brw_ADD(struct brw_compile *p,
1974
					      struct brw_reg dest,
1975
					      struct brw_reg src0,
1976
					      struct brw_reg src1)
1977
{
1978
	/* 6.2.2: add */
1979
	if (src0.type == BRW_REGISTER_TYPE_F ||
1980
	    (src0.file == BRW_IMMEDIATE_VALUE &&
1981
	     src0.type == BRW_REGISTER_TYPE_VF)) {
1982
		assert(src1.type != BRW_REGISTER_TYPE_UD);
1983
		assert(src1.type != BRW_REGISTER_TYPE_D);
1984
	}
1985
 
1986
	if (src1.type == BRW_REGISTER_TYPE_F ||
1987
	    (src1.file == BRW_IMMEDIATE_VALUE &&
1988
	     src1.type == BRW_REGISTER_TYPE_VF)) {
1989
		assert(src0.type != BRW_REGISTER_TYPE_UD);
1990
		assert(src0.type != BRW_REGISTER_TYPE_D);
1991
	}
1992
 
1993
	return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
1994
}
1995
 
1996
static inline struct brw_instruction *brw_MUL(struct brw_compile *p,
1997
					      struct brw_reg dest,
1998
					      struct brw_reg src0,
1999
					      struct brw_reg src1)
2000
{
2001
	/* 6.32.38: mul */
2002
	if (src0.type == BRW_REGISTER_TYPE_D ||
2003
	    src0.type == BRW_REGISTER_TYPE_UD ||
2004
	    src1.type == BRW_REGISTER_TYPE_D ||
2005
	    src1.type == BRW_REGISTER_TYPE_UD) {
2006
		assert(dest.type != BRW_REGISTER_TYPE_F);
2007
	}
2008
 
2009
	if (src0.type == BRW_REGISTER_TYPE_F ||
2010
	    (src0.file == BRW_IMMEDIATE_VALUE &&
2011
	     src0.type == BRW_REGISTER_TYPE_VF)) {
2012
		assert(src1.type != BRW_REGISTER_TYPE_UD);
2013
		assert(src1.type != BRW_REGISTER_TYPE_D);
2014
	}
2015
 
2016
	if (src1.type == BRW_REGISTER_TYPE_F ||
2017
	    (src1.file == BRW_IMMEDIATE_VALUE &&
2018
	     src1.type == BRW_REGISTER_TYPE_VF)) {
2019
		assert(src0.type != BRW_REGISTER_TYPE_UD);
2020
		assert(src0.type != BRW_REGISTER_TYPE_D);
2021
	}
2022
 
2023
	assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2024
	       src0.nr != BRW_ARF_ACCUMULATOR);
2025
	assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
2026
	       src1.nr != BRW_ARF_ACCUMULATOR);
2027
 
2028
	return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
2029
}
2030
 
2031
static inline struct brw_instruction *brw_JMPI(struct brw_compile *p,
2032
					       struct brw_reg dest,
2033
					       struct brw_reg src0,
2034
					       struct brw_reg src1)
2035
{
2036
	struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
2037
 
2038
	insn->header.execution_size = 1;
2039
	insn->header.compression_control = BRW_COMPRESSION_NONE;
2040
	insn->header.mask_control = BRW_MASK_DISABLE;
2041
 
2042
	p->current->header.predicate_control = BRW_PREDICATE_NONE;
2043
 
2044
	return insn;
2045
}
2046
 
2047
 
2048
ALU1(MOV);
2049
ALU2(SEL);
2050
ALU1(NOT);
2051
ALU2(AND);
2052
ALU2(OR);
2053
ALU2(XOR);
2054
ALU2(SHR);
2055
ALU2(SHL);
2056
ALU2(RSR);
2057
ALU2(RSL);
2058
ALU2(ASR);
2059
ALU1(FRC);
2060
ALU1(RNDD);
2061
ALU2(MAC);
2062
ALU2(MACH);
2063
ALU1(LZD);
2064
ALU2(DP4);
2065
ALU2(DPH);
2066
ALU2(DP3);
2067
ALU2(DP2);
2068
ALU2(LINE);
2069
ALU2(PLN);
2070
 
2071
ROUND(RNDZ);
2072
ROUND(RNDE);
2073
 
2074
#undef ALU1
2075
#undef ALU2
2076
#undef ROUND
2077
 
2078
/* Helpers for SEND instruction */
2079
void brw_set_dp_read_message(struct brw_compile *p,
2080
			     struct brw_instruction *insn,
2081
			     unsigned binding_table_index,
2082
			     unsigned msg_control,
2083
			     unsigned msg_type,
2084
			     unsigned target_cache,
2085
			     unsigned msg_length,
2086
			     unsigned response_length);
2087
 
2088
void brw_set_dp_write_message(struct brw_compile *p,
2089
			      struct brw_instruction *insn,
2090
			      unsigned binding_table_index,
2091
			      unsigned msg_control,
2092
			      unsigned msg_type,
2093
			      unsigned msg_length,
2094
			      bool header_present,
2095
			      bool last_render_target,
2096
			      unsigned response_length,
2097
			      bool end_of_thread,
2098
			      bool send_commit_msg);
2099
 
2100
void brw_urb_WRITE(struct brw_compile *p,
2101
		   struct brw_reg dest,
2102
		   unsigned msg_reg_nr,
2103
		   struct brw_reg src0,
2104
		   bool allocate,
2105
		   bool used,
2106
		   unsigned msg_length,
2107
		   unsigned response_length,
2108
		   bool eot,
2109
		   bool writes_complete,
2110
		   unsigned offset,
2111
		   unsigned swizzle);
2112
 
2113
void brw_ff_sync(struct brw_compile *p,
2114
		 struct brw_reg dest,
2115
		 unsigned msg_reg_nr,
2116
		 struct brw_reg src0,
2117
		 bool allocate,
2118
		 unsigned response_length,
2119
		 bool eot);
2120
 
2121
void brw_fb_WRITE(struct brw_compile *p,
2122
		  int dispatch_width,
2123
                  unsigned msg_reg_nr,
2124
                  struct brw_reg src0,
2125
                  unsigned msg_control,
2126
                  unsigned binding_table_index,
2127
                  unsigned msg_length,
2128
                  unsigned response_length,
2129
                  bool eot,
2130
                  bool header_present);
2131
 
2132
void brw_SAMPLE(struct brw_compile *p,
2133
		struct brw_reg dest,
2134
		unsigned msg_reg_nr,
2135
		struct brw_reg src0,
2136
		unsigned binding_table_index,
2137
		unsigned sampler,
2138
		unsigned writemask,
2139
		unsigned msg_type,
2140
		unsigned response_length,
2141
		unsigned msg_length,
2142
		bool header_present,
2143
		unsigned simd_mode);
2144
 
2145
void brw_math_16(struct brw_compile *p,
2146
		 struct brw_reg dest,
2147
		 unsigned function,
2148
		 unsigned saturate,
2149
		 unsigned msg_reg_nr,
2150
		 struct brw_reg src,
2151
		 unsigned precision);
2152
 
2153
void brw_math(struct brw_compile *p,
2154
	      struct brw_reg dest,
2155
	      unsigned function,
2156
	      unsigned saturate,
2157
	      unsigned msg_reg_nr,
2158
	      struct brw_reg src,
2159
	      unsigned data_type,
2160
	      unsigned precision);
2161
 
2162
void brw_math2(struct brw_compile *p,
2163
	       struct brw_reg dest,
2164
	       unsigned function,
2165
	       struct brw_reg src0,
2166
	       struct brw_reg src1);
2167
 
2168
void brw_oword_block_read(struct brw_compile *p,
2169
			  struct brw_reg dest,
2170
			  struct brw_reg mrf,
2171
			  uint32_t offset,
2172
			  uint32_t bind_table_index);
2173
 
2174
void brw_oword_block_read_scratch(struct brw_compile *p,
2175
				  struct brw_reg dest,
2176
				  struct brw_reg mrf,
2177
				  int num_regs,
2178
				  unsigned offset);
2179
 
2180
void brw_oword_block_write_scratch(struct brw_compile *p,
2181
				   struct brw_reg mrf,
2182
				   int num_regs,
2183
				   unsigned offset);
2184
 
2185
void brw_dword_scattered_read(struct brw_compile *p,
2186
			      struct brw_reg dest,
2187
			      struct brw_reg mrf,
2188
			      uint32_t bind_table_index);
2189
 
2190
void brw_dp_READ_4_vs(struct brw_compile *p,
2191
		      struct brw_reg dest,
2192
		      unsigned location,
2193
		      unsigned bind_table_index);
2194
 
2195
void brw_dp_READ_4_vs_relative(struct brw_compile *p,
2196
			       struct brw_reg dest,
2197
			       struct brw_reg addrReg,
2198
			       unsigned offset,
2199
			       unsigned bind_table_index);
2200
 
2201
/* If/else/endif.  Works by manipulating the execution flags on each
2202
 * channel.
2203
 */
2204
struct brw_instruction *brw_IF(struct brw_compile *p,
2205
			       unsigned execute_size);
2206
struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
2207
				struct brw_reg src0, struct brw_reg src1);
2208
 
2209
void brw_ELSE(struct brw_compile *p);
2210
void brw_ENDIF(struct brw_compile *p);
2211
 
2212
/* DO/WHILE loops:
2213
*/
2214
struct brw_instruction *brw_DO(struct brw_compile *p,
2215
			       unsigned execute_size);
2216
 
2217
struct brw_instruction *brw_WHILE(struct brw_compile *p,
2218
				  struct brw_instruction *patch_insn);
2219
 
2220
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
2221
struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
2222
struct brw_instruction *gen6_CONT(struct brw_compile *p,
2223
				  struct brw_instruction *do_insn);
2224
/* Forward jumps:
2225
*/
2226
void brw_land_fwd_jump(struct brw_compile *p,
2227
		       struct brw_instruction *jmp_insn);
2228
 
2229
void brw_NOP(struct brw_compile *p);
2230
 
2231
void brw_WAIT(struct brw_compile *p);
2232
 
2233
/* Special case: there is never a destination, execution size will be
2234
 * taken from src0:
2235
 */
2236
void brw_CMP(struct brw_compile *p,
2237
	     struct brw_reg dest,
2238
	     unsigned conditional,
2239
	     struct brw_reg src0,
2240
	     struct brw_reg src1);
2241
 
2242
static inline void brw_math_invert(struct brw_compile *p,
2243
				   struct brw_reg dst,
2244
				   struct brw_reg src)
2245
{
2246
	brw_math(p,
2247
		 dst,
2248
		 BRW_MATH_FUNCTION_INV,
2249
		 BRW_MATH_SATURATE_NONE,
2250
		 0,
2251
		 src,
2252
		 BRW_MATH_PRECISION_FULL,
2253
		 BRW_MATH_DATA_VECTOR);
2254
}
2255
 
2256
void brw_set_uip_jip(struct brw_compile *p);
2257
 
2258
uint32_t brw_swap_cmod(uint32_t cmod);
2259
 
2260
void brw_disasm(FILE *file,
2261
		const struct brw_instruction *inst,
2262
		int gen);
2263
 
2264
#endif