Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 2012-2013 LunarG, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Chia-I Wu 
26
 */
27
 
28
#include "toy_compiler.h"
29
 
30
#define CG_REG_SHIFT 5
31
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
32
 
33
struct codegen {
34
   const struct ilo_dev *dev;
35
   const struct toy_inst *inst;
36
   int pc;
37
 
38
   unsigned flag_reg_num;
39
   unsigned flag_sub_reg_num;
40
 
41
   struct codegen_dst {
42
      unsigned file;
43
      unsigned type;
44
      bool indirect;
45
      unsigned indirect_subreg;
46
      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
47
 
48
      unsigned horz_stride;
49
 
50
      unsigned writemask;
51
   } dst;
52
 
53
   struct codegen_src {
54
      unsigned file;
55
      unsigned type;
56
      bool indirect;
57
      unsigned indirect_subreg;
58
      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
59
 
60
      unsigned vert_stride;
61
      unsigned width;
62
      unsigned horz_stride;
63
 
64
      unsigned swizzle[4];
65
      bool absolute;
66
      bool negate;
67
   } src[3];
68
};
69
 
70
/*
71
 * From the Sandy Bridge PRM, volume 4 part 2, page 107-108:
72
 *
73
 *     "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up
74
 *      result forms bits [88:77], the source 0 register region fields, of the
75
 *      128-bit instruction word."
76
 *
77
 *     "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit
78
 *      table-look-up result forms bits [100:96], [68,64] and [52,48] of the
79
 *      128-bit instruction word."
80
 *
81
 *     "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit
82
 *      table-look-up result forms bits [63:61] and [46, 32] of the 128-bit
83
 *      instruction word."
84
 *
85
 *     "(ControlIndex) The 5-bit index for data type fields. The 17-bit
86
 *      table-look-up result forms bits[31], and [23, 8] of the 128-bit
87
 *      instruction word."
88
 */
89
static const struct toy_compaction_table toy_compaction_table_gen6 = {
90
   .control = {
91
      [0]   = 0x00000,  /* 00000000000000000 */
92
      [1]   = 0x08000,  /* 01000000000000000 */
93
      [2]   = 0x06000,  /* 00110000000000000 */
94
      [3]   = 0x00100,  /* 00000000100000000 */
95
      [4]   = 0x02000,  /* 00010000000000000 */
96
      [5]   = 0x01100,  /* 00001000100000000 */
97
      [6]   = 0x00102,  /* 00000000100000010 */
98
      [7]   = 0x00002,  /* 00000000000000010 */
99
      [8]   = 0x08100,  /* 01000000100000000 */
100
      [9]   = 0x0a000,  /* 01010000000000000 */
101
      [10]  = 0x16000,  /* 10110000000000000 */
102
      [11]  = 0x04000,  /* 00100000000000000 */
103
      [12]  = 0x1a000,  /* 11010000000000000 */
104
      [13]  = 0x18000,  /* 11000000000000000 */
105
      [14]  = 0x09100,  /* 01001000100000000 */
106
      [15]  = 0x08008,  /* 01000000000001000 */
107
      [16]  = 0x08004,  /* 01000000000000100 */
108
      [17]  = 0x00008,  /* 00000000000001000 */
109
      [18]  = 0x00004,  /* 00000000000000100 */
110
      [19]  = 0x01100,  /* 00111000100000000 */
111
      [20]  = 0x01102,  /* 00001000100000010 */
112
      [21]  = 0x06100,  /* 00110000100000000 */
113
      [22]  = 0x06001,  /* 00110000000000001 */
114
      [23]  = 0x04001,  /* 00100000000000001 */
115
      [24]  = 0x06002,  /* 00110000000000010 */
116
      [25]  = 0x06005,  /* 00110000000000101 */
117
      [26]  = 0x06009,  /* 00110000000001001 */
118
      [27]  = 0x06010,  /* 00110000000010000 */
119
      [28]  = 0x06003,  /* 00110000000000011 */
120
      [29]  = 0x06004,  /* 00110000000000100 */
121
      [30]  = 0x06108,  /* 00110000100001000 */
122
      [31]  = 0x04009,  /* 00100000000001001 */
123
   },
124
   .datatype = {
125
      [0]   = 0x09c00,  /* 001001110000000000 */
126
      [1]   = 0x08c20,  /* 001000110000100000 */
127
      [2]   = 0x09c01,  /* 001001110000000001 */
128
      [3]   = 0x08060,  /* 001000000001100000 */
129
      [4]   = 0x0ad29,  /* 001010110100101001 */
130
      [5]   = 0x081ad,  /* 001000000110101101 */
131
      [6]   = 0x0c62c,  /* 001100011000101100 */
132
      [7]   = 0x0bdad,  /* 001011110110101101 */
133
      [8]   = 0x081ec,  /* 001000000111101100 */
134
      [9]   = 0x08061,  /* 001000000001100001 */
135
      [10]  = 0x08ca5,  /* 001000110010100101 */
136
      [11]  = 0x08041,  /* 001000000001000001 */
137
      [12]  = 0x08231,  /* 001000001000110001 */
138
      [13]  = 0x08229,  /* 001000001000101001 */
139
      [14]  = 0x08020,  /* 001000000000100000 */
140
      [15]  = 0x08232,  /* 001000001000110010 */
141
      [16]  = 0x0a529,  /* 001010010100101001 */
142
      [17]  = 0x0b4a5,  /* 001011010010100101 */
143
      [18]  = 0x081a5,  /* 001000000110100101 */
144
      [19]  = 0x0c629,  /* 001100011000101001 */
145
      [20]  = 0x0b62c,  /* 001011011000101100 */
146
      [21]  = 0x0b5a5,  /* 001011010110100101 */
147
      [22]  = 0x0bda5,  /* 001011110110100101 */
148
      [23]  = 0x0f1bd,  /* 001111011110111101 */
149
      [24]  = 0x0f1bc,  /* 001111011110111100 */
150
      [25]  = 0x0f1bd,  /* 001111011110111101 */
151
      [26]  = 0x0f19d,  /* 001111011110011101 */
152
      [27]  = 0x0f1be,  /* 001111011110111110 */
153
      [28]  = 0x08021,  /* 001000000000100001 */
154
      [29]  = 0x08022,  /* 001000000000100010 */
155
      [30]  = 0x09fdd,  /* 001001111111011101 */
156
      [31]  = 0x083be,  /* 001000001110111110 */
157
   },
158
   .subreg = {
159
      [0]   = 0x0000,   /* 000000000000000 */
160
      [1]   = 0x0004,   /* 000000000000100 */
161
      [2]   = 0x0180,   /* 000000110000000 */
162
      [3]   = 0x1000,   /* 111000000000000 */
163
      [4]   = 0x3c08,   /* 011110000001000 */
164
      [5]   = 0x0400,   /* 000010000000000 */
165
      [6]   = 0x0010,   /* 000000000010000 */
166
      [7]   = 0x0c0c,   /* 000110000001100 */
167
      [8]   = 0x1000,   /* 001000000000000 */
168
      [9]   = 0x0200,   /* 000001000000000 */
169
      [10]  = 0x0294,   /* 000001010010100 */
170
      [11]  = 0x0056,   /* 000000001010110 */
171
      [12]  = 0x2000,   /* 010000000000000 */
172
      [13]  = 0x6000,   /* 110000000000000 */
173
      [14]  = 0x0800,   /* 000100000000000 */
174
      [15]  = 0x0080,   /* 000000010000000 */
175
      [16]  = 0x0008,   /* 000000000001000 */
176
      [17]  = 0x4000,   /* 100000000000000 */
177
      [18]  = 0x0280,   /* 000001010000000 */
178
      [19]  = 0x1400,   /* 001010000000000 */
179
      [20]  = 0x1800,   /* 001100000000000 */
180
      [21]  = 0x0054,   /* 000000001010100 */
181
      [22]  = 0x5a94,   /* 101101010010100 */
182
      [23]  = 0x2800,   /* 010100000000000 */
183
      [24]  = 0x008f,   /* 000000010001111 */
184
      [25]  = 0x3000,   /* 011000000000000 */
185
      [26]  = 0x1c00,   /* 111110000000000 */
186
      [27]  = 0x5000,   /* 101000000000000 */
187
      [28]  = 0x000f,   /* 000000000001111 */
188
      [29]  = 0x088f,   /* 000100010001111 */
189
      [30]  = 0x108f,   /* 001000010001111 */
190
      [31]  = 0x0c00,   /* 000110000000000 */
191
   },
192
   .src = {
193
      [0]   = 0x000,    /* 000000000000 */
194
      [1]   = 0x588,    /* 010110001000 */
195
      [2]   = 0x468,    /* 010001101000 */
196
      [3]   = 0x228,    /* 001000101000 */
197
      [4]   = 0x690,    /* 011010010000 */
198
      [5]   = 0x120,    /* 000100100000 */
199
      [6]   = 0x46c,    /* 010001101100 */
200
      [7]   = 0x510,    /* 010101110000 */
201
      [8]   = 0x618,    /* 011001111000 */
202
      [9]   = 0x328,    /* 001100101000 */
203
      [10]  = 0x58c,    /* 010110001100 */
204
      [11]  = 0x220,    /* 001000100000 */
205
      [12]  = 0x58a,    /* 010110001010 */
206
      [13]  = 0x002,    /* 000000000010 */
207
      [14]  = 0x550,    /* 010101010000 */
208
      [15]  = 0x568,    /* 010101101000 */
209
      [16]  = 0xf4c,    /* 111101001100 */
210
      [17]  = 0xf2c,    /* 111100101100 */
211
      [18]  = 0x610,    /* 011001110000 */
212
      [19]  = 0x589,    /* 010110001001 */
213
      [20]  = 0x558,    /* 010101011000 */
214
      [21]  = 0x348,    /* 001101001000 */
215
      [22]  = 0x42c,    /* 010000101100 */
216
      [23]  = 0x400,    /* 010000000000 */
217
      [24]  = 0x310,    /* 001101110000 */
218
      [25]  = 0x310,    /* 001100010000 */
219
      [26]  = 0x300,    /* 001100000000 */
220
      [27]  = 0x46a,    /* 010001101010 */
221
      [28]  = 0x318,    /* 001101111000 */
222
      [29]  = 0x010,    /* 000001110000 */
223
      [30]  = 0x320,    /* 001100100000 */
224
      [31]  = 0x350,    /* 001101010000 */
225
   },
226
};
227
 
228
/*
229
 * From the Ivy Bridge PRM, volume 4 part 3, page 128:
230
 *
231
 *     "(Src0Index) Lookup one of 32 12-bit values. That value is used (from
232
 *      MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride,
233
 *      Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields."
234
 *
235
 *     "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from
236
 *      MSB to LSB) for various fields for Src1, Src0, and Dst, including
237
 *      ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending
238
 *      on AddrMode and AccessMode.
239
 *
240
 *     "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used
241
 *      (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType,
242
 *      Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and
243
 *      Src1.RegType bit fields."
244
 *
245
 *     "(ControlIndex) Lookup one of 32 19-bit values. That value is used
246
 *      (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate,
247
 *      ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl,
248
 *      and AccessMode bit fields."
249
 */
250
static const struct toy_compaction_table toy_compaction_table_gen7 = {
251
   .control = {
252
      [0]   = 0x00002,  /* 0000000000000000010 */
253
      [1]   = 0x04000,  /* 0000100000000000000 */
254
      [2]   = 0x04001,  /* 0000100000000000001 */
255
      [3]   = 0x04002,  /* 0000100000000000010 */
256
      [4]   = 0x04003,  /* 0000100000000000011 */
257
      [5]   = 0x04004,  /* 0000100000000000100 */
258
      [6]   = 0x04005,  /* 0000100000000000101 */
259
      [7]   = 0x04007,  /* 0000100000000000111 */
260
      [8]   = 0x04008,  /* 0000100000000001000 */
261
      [9]   = 0x04009,  /* 0000100000000001001 */
262
      [10]  = 0x0400d,  /* 0000100000000001101 */
263
      [11]  = 0x06000,  /* 0000110000000000000 */
264
      [12]  = 0x06001,  /* 0000110000000000001 */
265
      [13]  = 0x06002,  /* 0000110000000000010 */
266
      [14]  = 0x06003,  /* 0000110000000000011 */
267
      [15]  = 0x06004,  /* 0000110000000000100 */
268
      [16]  = 0x06005,  /* 0000110000000000101 */
269
      [17]  = 0x06007,  /* 0000110000000000111 */
270
      [18]  = 0x06009,  /* 0000110000000001001 */
271
      [19]  = 0x0600d,  /* 0000110000000001101 */
272
      [20]  = 0x06010,  /* 0000110000000010000 */
273
      [21]  = 0x06100,  /* 0000110000100000000 */
274
      [22]  = 0x08000,  /* 0001000000000000000 */
275
      [23]  = 0x08002,  /* 0001000000000000010 */
276
      [24]  = 0x08004,  /* 0001000000000000100 */
277
      [25]  = 0x08100,  /* 0001000000100000000 */
278
      [26]  = 0x16000,  /* 0010110000000000000 */
279
      [27]  = 0x16010,  /* 0010110000000010000 */
280
      [28]  = 0x18000,  /* 0011000000000000000 */
281
      [29]  = 0x18100,  /* 0011000000100000000 */
282
      [30]  = 0x28000,  /* 0101000000000000000 */
283
      [31]  = 0x28100,  /* 0101000000100000000 */
284
   },
285
   .datatype = {
286
      [0]   = 0x08001,  /* 001000000000000001 */
287
      [1]   = 0x08020,  /* 001000000000100000 */
288
      [2]   = 0x08021,  /* 001000000000100001 */
289
      [3]   = 0x08061,  /* 001000000001100001 */
290
      [4]   = 0x080bd,  /* 001000000010111101 */
291
      [5]   = 0x082fd,  /* 001000001011111101 */
292
      [6]   = 0x083a1,  /* 001000001110100001 */
293
      [7]   = 0x083a5,  /* 001000001110100101 */
294
      [8]   = 0x083bd,  /* 001000001110111101 */
295
      [9]   = 0x08421,  /* 001000010000100001 */
296
      [10]  = 0x08c20,  /* 001000110000100000 */
297
      [11]  = 0x08c21,  /* 001000110000100001 */
298
      [12]  = 0x094a5,  /* 001001010010100101 */
299
      [13]  = 0x09ca4,  /* 001001110010100100 */
300
      [14]  = 0x09ca5,  /* 001001110010100101 */
301
      [15]  = 0x0f3bd,  /* 001111001110111101 */
302
      [16]  = 0x0f79d,  /* 001111011110011101 */
303
      [17]  = 0x0f7bc,  /* 001111011110111100 */
304
      [18]  = 0x0f7bd,  /* 001111011110111101 */
305
      [19]  = 0x0ffbc,  /* 001111111110111100 */
306
      [20]  = 0x0020c,  /* 000000001000001100 */
307
      [21]  = 0x0803d,  /* 001000000000111101 */
308
      [22]  = 0x080a5,  /* 001000000010100101 */
309
      [23]  = 0x08420,  /* 001000010000100000 */
310
      [24]  = 0x094a4,  /* 001001010010100100 */
311
      [25]  = 0x09c84,  /* 001001110010000100 */
312
      [26]  = 0x0a509,  /* 001010010100001001 */
313
      [27]  = 0x0dfbd,  /* 001101111110111101 */
314
      [28]  = 0x0ffbd,  /* 001111111110111101 */
315
      [29]  = 0x0bdac,  /* 001011110110101100 */
316
      [30]  = 0x0a528,  /* 001010010100101000 */
317
      [31]  = 0x0ad28,  /* 001010110100101000 */
318
   },
319
   .subreg = {
320
      [0]   = 0x0000,   /* 000000000000000 */
321
      [1]   = 0x0001,   /* 000000000000001 */
322
      [2]   = 0x0008,   /* 000000000001000 */
323
      [3]   = 0x000f,   /* 000000000001111 */
324
      [4]   = 0x0010,   /* 000000000010000 */
325
      [5]   = 0x0080,   /* 000000010000000 */
326
      [6]   = 0x0100,   /* 000000100000000 */
327
      [7]   = 0x0180,   /* 000000110000000 */
328
      [8]   = 0x0200,   /* 000001000000000 */
329
      [9]   = 0x0210,   /* 000001000010000 */
330
      [10]  = 0x0280,   /* 000001010000000 */
331
      [11]  = 0x1000,   /* 001000000000000 */
332
      [12]  = 0x1001,   /* 001000000000001 */
333
      [13]  = 0x1081,   /* 001000010000001 */
334
      [14]  = 0x1082,   /* 001000010000010 */
335
      [15]  = 0x1083,   /* 001000010000011 */
336
      [16]  = 0x1084,   /* 001000010000100 */
337
      [17]  = 0x1087,   /* 001000010000111 */
338
      [18]  = 0x1088,   /* 001000010001000 */
339
      [19]  = 0x108e,   /* 001000010001110 */
340
      [20]  = 0x108f,   /* 001000010001111 */
341
      [21]  = 0x1180,   /* 001000110000000 */
342
      [22]  = 0x11e8,   /* 001000111101000 */
343
      [23]  = 0x2000,   /* 010000000000000 */
344
      [24]  = 0x2180,   /* 010000110000000 */
345
      [25]  = 0x3000,   /* 011000000000000 */
346
      [26]  = 0x3c87,   /* 011110010000111 */
347
      [27]  = 0x4000,   /* 100000000000000 */
348
      [28]  = 0x5000,   /* 101000000000000 */
349
      [29]  = 0x6000,   /* 110000000000000 */
350
      [30]  = 0x7000,   /* 111000000000000 */
351
      [31]  = 0x701c,   /* 111000000011100 */
352
   },
353
   .src = {
354
      [0]   = 0x000,    /* 000000000000 */
355
      [1]   = 0x002,    /* 000000000010 */
356
      [2]   = 0x010,    /* 000000010000 */
357
      [3]   = 0x012,    /* 000000010010 */
358
      [4]   = 0x018,    /* 000000011000 */
359
      [5]   = 0x020,    /* 000000100000 */
360
      [6]   = 0x028,    /* 000000101000 */
361
      [7]   = 0x048,    /* 000001001000 */
362
      [8]   = 0x050,    /* 000001010000 */
363
      [9]   = 0x070,    /* 000001110000 */
364
      [10]  = 0x078,    /* 000001111000 */
365
      [11]  = 0x300,    /* 001100000000 */
366
      [12]  = 0x302,    /* 001100000010 */
367
      [13]  = 0x308,    /* 001100001000 */
368
      [14]  = 0x310,    /* 001100010000 */
369
      [15]  = 0x312,    /* 001100010010 */
370
      [16]  = 0x320,    /* 001100100000 */
371
      [17]  = 0x328,    /* 001100101000 */
372
      [18]  = 0x338,    /* 001100111000 */
373
      [19]  = 0x340,    /* 001101000000 */
374
      [20]  = 0x342,    /* 001101000010 */
375
      [21]  = 0x348,    /* 001101001000 */
376
      [22]  = 0x350,    /* 001101010000 */
377
      [23]  = 0x360,    /* 001101100000 */
378
      [24]  = 0x368,    /* 001101101000 */
379
      [25]  = 0x370,    /* 001101110000 */
380
      [26]  = 0x371,    /* 001101110001 */
381
      [27]  = 0x378,    /* 001101111000 */
382
      [28]  = 0x468,    /* 010001101000 */
383
      [29]  = 0x469,    /* 010001101001 */
384
      [30]  = 0x46a,    /* 010001101010 */
385
      [31]  = 0x588,    /* 010110001000 */
386
   },
387
};
388
 
389
static const struct toy_compaction_table toy_compaction_table_gen8 = {
390
   .control = {
391
   },
392
   .datatype = {
393
   },
394
   .subreg = {
395
   },
396
   .src = {
397
   },
398
   .control_3src = {
399
   },
400
   .source_3src = {
401
   },
402
};
403
 
404
const struct toy_compaction_table *
405
toy_compiler_get_compaction_table(const struct ilo_dev *dev)
406
{
407
   switch (ilo_dev_gen(dev)) {
408
   case ILO_GEN(8):
409
      return &toy_compaction_table_gen8;
410
   case ILO_GEN(7.5):
411
   case ILO_GEN(7):
412
      return &toy_compaction_table_gen7;
413
   case ILO_GEN(6):
414
      return &toy_compaction_table_gen6;
415
   default:
416
      assert(!"unsupported gen");
417
      return NULL;
418
   }
419
}
420
 
421
/**
422
 * Return true if the source operand is null.
423
 */
424
static bool
425
src_is_null(const struct codegen *cg, int idx)
426
{
427
   const struct codegen_src *src = &cg->src[idx];
428
 
429
   return (src->file == GEN6_FILE_ARF &&
430
           src->origin == GEN6_ARF_NULL << CG_REG_SHIFT);
431
}
432
 
433
/**
434
 * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
435
 */
436
static uint32_t
437
translate_src_gen6(const struct codegen *cg, int idx)
438
{
439
   const struct codegen_src *src = &cg->src[idx];
440
   uint32_t dw;
441
 
442
   ILO_DEV_ASSERT(cg->dev, 6, 8);
443
 
444
   /* special treatment may be needed if any of the operand is immediate */
445
   if (cg->src[0].file == GEN6_FILE_IMM) {
446
      assert(!cg->src[0].absolute && !cg->src[0].negate);
447
 
448
      /* only the last src operand can be an immediate unless it is Gen8+ */
449
      assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1));
450
 
451
      if (!src_is_null(cg, 1))
452
         return cg->src[idx].origin;
453
 
454
      if (idx == 0) {
455
         if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
456
            return cg->src[1].type << 27 |
457
                   cg->src[1].file << 25;
458
         } else {
459
            return cg->flag_sub_reg_num << 25;
460
         }
461
      } else {
462
         return cg->src[0].origin;
463
      }
464
   }
465
   else if (idx && cg->src[1].file == GEN6_FILE_IMM) {
466
      assert(!cg->src[1].absolute && !cg->src[1].negate);
467
      return cg->src[1].origin;
468
   }
469
 
470
   assert(src->file != GEN6_FILE_IMM);
471
 
472
   if (src->indirect) {
473
      const int offset = (int) src->origin;
474
 
475
      assert(src->file == GEN6_FILE_GRF);
476
      assert(offset < 512 && offset >= -512);
477
 
478
      if (cg->inst->access_mode == GEN6_ALIGN_16) {
479
         assert(src->width == GEN6_WIDTH_4);
480
         assert(src->horz_stride == GEN6_HORZSTRIDE_1);
481
 
482
         /* the lower 4 bits are reserved for the swizzle_[xy] */
483
         assert(!(src->origin & 0xf));
484
 
485
         dw = src->vert_stride << 21 |
486
              src->swizzle[3] << 18 |
487
              src->swizzle[2] << 16 |
488
              GEN6_ADDRMODE_INDIRECT << 15 |
489
              src->negate << 14 |
490
              src->absolute << 13 |
491
              src->swizzle[1] << 2 |
492
              src->swizzle[0];
493
         if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
494
            dw |= src->indirect_subreg << 9 |
495
                  (src->origin & 0x1f0);
496
         } else {
497
            dw |= src->indirect_subreg << 10 |
498
                  (src->origin & 0x3f0);
499
         }
500
      }
501
      else {
502
         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
503
                src->swizzle[1] == TOY_SWIZZLE_Y &&
504
                src->swizzle[2] == TOY_SWIZZLE_Z &&
505
                src->swizzle[3] == TOY_SWIZZLE_W);
506
 
507
         dw = src->vert_stride << 21 |
508
              src->width << 18 |
509
              src->horz_stride << 16 |
510
              GEN6_ADDRMODE_INDIRECT << 15 |
511
              src->negate << 14 |
512
              src->absolute << 13;
513
         if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
514
            dw |= src->indirect_subreg << 9 |
515
                  (src->origin & 0x1ff);
516
         } else {
517
            dw |= src->indirect_subreg << 10 |
518
                  (src->origin & 0x3ff);
519
         }
520
      }
521
   }
522
   else {
523
      switch (src->file) {
524
      case GEN6_FILE_ARF:
525
         break;
526
      case GEN6_FILE_GRF:
527
         assert(CG_REG_NUM(src->origin) < 128);
528
         break;
529
      case GEN6_FILE_MRF:
530
         assert(cg->inst->opcode == GEN6_OPCODE_SEND ||
531
                cg->inst->opcode == GEN6_OPCODE_SENDC);
532
         assert(CG_REG_NUM(src->origin) < 16);
533
         break;
534
      case GEN6_FILE_IMM:
535
      default:
536
         assert(!"invalid src file");
537
         break;
538
      }
539
 
540
      if (cg->inst->access_mode == GEN6_ALIGN_16) {
541
         assert(src->width == GEN6_WIDTH_4);
542
         assert(src->horz_stride == GEN6_HORZSTRIDE_1);
543
 
544
         /* the lower 4 bits are reserved for the swizzle_[xy] */
545
         assert(!(src->origin & 0xf));
546
 
547
         dw = src->vert_stride << 21 |
548
              src->swizzle[3] << 18 |
549
              src->swizzle[2] << 16 |
550
              GEN6_ADDRMODE_DIRECT << 15 |
551
              src->negate << 14 |
552
              src->absolute << 13 |
553
              src->origin |
554
              src->swizzle[1] << 2 |
555
              src->swizzle[0];
556
      }
557
      else {
558
         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
559
                src->swizzle[1] == TOY_SWIZZLE_Y &&
560
                src->swizzle[2] == TOY_SWIZZLE_Z &&
561
                src->swizzle[3] == TOY_SWIZZLE_W);
562
 
563
         dw = src->vert_stride << 21 |
564
              src->width << 18 |
565
              src->horz_stride << 16 |
566
              GEN6_ADDRMODE_DIRECT << 15 |
567
              src->negate << 14 |
568
              src->absolute << 13 |
569
              src->origin;
570
      }
571
   }
572
 
573
   if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
574
      const bool indirect_origin_bit9 = (cg->dst.indirect) ?
575
         (src->origin & 0x200) : 0;
576
 
577
      if (idx == 0) {
578
         dw |= indirect_origin_bit9 << 31 |
579
               cg->src[1].type << 27 |
580
               cg->src[1].file << 25;
581
      } else {
582
         dw |= indirect_origin_bit9 << 25;
583
      }
584
   } else {
585
      if (idx == 0)
586
         dw |= cg->flag_sub_reg_num << 25;
587
   }
588
 
589
   return dw;
590
}
591
 
592
/**
593
 * Translate the destination operand to the higher 16 bits of DW1 of the
594
 * 1-src/2-src format.
595
 */
596
static uint16_t
597
translate_dst_region_gen6(const struct codegen *cg)
598
{
599
   const struct codegen_dst *dst = &cg->dst;
600
   uint16_t dw1_region;
601
 
602
   ILO_DEV_ASSERT(cg->dev, 6, 8);
603
 
604
   if (dst->file == GEN6_FILE_IMM) {
605
      /* dst is immediate (JIP) when the opcode is a conditional branch */
606
      switch (cg->inst->opcode) {
607
      case GEN6_OPCODE_IF:
608
      case GEN6_OPCODE_ELSE:
609
      case GEN6_OPCODE_ENDIF:
610
      case GEN6_OPCODE_WHILE:
611
         assert(dst->type == GEN6_TYPE_W);
612
         dw1_region = (dst->origin & 0xffff);
613
         break;
614
      default:
615
         assert(!"dst cannot be immediate");
616
         dw1_region = 0;
617
         break;
618
      }
619
 
620
      return dw1_region;
621
   }
622
 
623
   if (dst->indirect) {
624
      const int offset = (int) dst->origin;
625
 
626
      assert(dst->file == GEN6_FILE_GRF);
627
      assert(offset < 512 && offset >= -512);
628
 
629
      if (cg->inst->access_mode == GEN6_ALIGN_16) {
630
         /*
631
          * From the Sandy Bridge PRM, volume 4 part 2, page 144:
632
          *
633
          *     "Allthough Dst.HorzStride is a don't care for Align16, HW
634
          *      needs this to be programmed as 01."
635
          */
636
         assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
637
         /* the lower 4 bits are reserved for the writemask */
638
         assert(!(dst->origin & 0xf));
639
 
640
         dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
641
                      dst->horz_stride << 13 |
642
                      dst->writemask;
643
         if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
644
            dw1_region |= dst->indirect_subreg << 9 |
645
                          (dst->origin & 0x1f0);
646
         } else {
647
            dw1_region |= dst->indirect_subreg << 10 |
648
                          (dst->origin & 0x3f0);
649
         }
650
      }
651
      else {
652
         assert(dst->writemask == TOY_WRITEMASK_XYZW);
653
 
654
         dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
655
                      dst->horz_stride << 13;
656
         if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
657
            dw1_region |= dst->indirect_subreg << 9 |
658
                          (dst->origin & 0x1ff);
659
         } else {
660
            dw1_region |= dst->indirect_subreg << 10 |
661
                          (dst->origin & 0x3ff);
662
         }
663
      }
664
   }
665
   else {
666
      assert((dst->file == GEN6_FILE_GRF &&
667
              CG_REG_NUM(dst->origin) < 128) ||
668
             (dst->file == GEN6_FILE_MRF &&
669
              CG_REG_NUM(dst->origin) < 16) ||
670
             (dst->file == GEN6_FILE_ARF));
671
 
672
      if (cg->inst->access_mode == GEN6_ALIGN_16) {
673
         /* similar to the indirect case */
674
         assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
675
         assert(!(dst->origin & 0xf));
676
 
677
         dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
678
                      dst->horz_stride << 13 |
679
                      dst->origin |
680
                      dst->writemask;
681
      }
682
      else {
683
         assert(dst->writemask == TOY_WRITEMASK_XYZW);
684
 
685
         dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
686
                      dst->horz_stride << 13 |
687
                      dst->origin;
688
      }
689
   }
690
 
691
   return dw1_region;
692
}
693
 
694
/**
695
 * Translate the destination operand to DW1 of the 1-src/2-src format.
696
 */
697
static uint32_t
698
translate_dst_gen6(const struct codegen *cg)
699
{
700
   ILO_DEV_ASSERT(cg->dev, 6, 7.5);
701
 
702
   return translate_dst_region_gen6(cg) << 16 |
703
          cg->src[1].type << 12 |
704
          cg->src[1].file << 10 |
705
          cg->src[0].type << 7 |
706
          cg->src[0].file << 5 |
707
          cg->dst.type << 2 |
708
          cg->dst.file;
709
}
710
 
711
static uint32_t
712
translate_dst_gen8(const struct codegen *cg)
713
{
714
   const bool indirect_origin_bit9 = (cg->dst.indirect) ?
715
      (cg->dst.origin & 0x200) : 0;
716
 
717
   ILO_DEV_ASSERT(cg->dev, 8, 8);
718
 
719
   return translate_dst_region_gen6(cg) << 16 |
720
          indirect_origin_bit9 << 15 |
721
          cg->src[0].type << 11 |
722
          cg->src[0].file << 9 |
723
          cg->dst.type << 5 |
724
          cg->dst.file << 3 |
725
          cg->inst->mask_ctrl << 2 |
726
          cg->flag_reg_num << 1 |
727
          cg->flag_sub_reg_num;
728
}
729
 
730
/**
731
 * Translate the instruction to DW0 of the 1-src/2-src format.
732
 */
733
static uint32_t
734
translate_inst_gen6(const struct codegen *cg)
735
{
736
   const bool debug_ctrl = false;
737
   const bool cmpt_ctrl = false;
738
 
739
   ILO_DEV_ASSERT(cg->dev, 6, 7.5);
740
 
741
   assert(cg->inst->opcode < 128);
742
 
743
   return cg->inst->saturate << 31 |
744
          debug_ctrl << 30 |
745
          cmpt_ctrl << 29 |
746
          cg->inst->acc_wr_ctrl << 28 |
747
          cg->inst->cond_modifier << 24 |
748
          cg->inst->exec_size << 21 |
749
          cg->inst->pred_inv << 20 |
750
          cg->inst->pred_ctrl << 16 |
751
          cg->inst->thread_ctrl << 14 |
752
          cg->inst->qtr_ctrl << 12 |
753
          cg->inst->dep_ctrl << 10 |
754
          cg->inst->mask_ctrl << 9 |
755
          cg->inst->access_mode << 8 |
756
          cg->inst->opcode;
757
}
758
 
759
static uint32_t
760
translate_inst_gen8(const struct codegen *cg)
761
{
762
   const bool debug_ctrl = false;
763
   const bool cmpt_ctrl = false;
764
 
765
   ILO_DEV_ASSERT(cg->dev, 8, 8);
766
 
767
   assert(cg->inst->opcode < 128);
768
 
769
   return cg->inst->saturate << 31 |
770
          debug_ctrl << 30 |
771
          cmpt_ctrl << 29 |
772
          cg->inst->acc_wr_ctrl << 28 |
773
          cg->inst->cond_modifier << 24 |
774
          cg->inst->exec_size << 21 |
775
          cg->inst->pred_inv << 20 |
776
          cg->inst->pred_ctrl << 16 |
777
          cg->inst->thread_ctrl << 14 |
778
          cg->inst->qtr_ctrl << 12 |
779
          cg->inst->dep_ctrl << 9 |
780
          cg->inst->access_mode << 8 |
781
          cg->inst->opcode;
782
}
783
 
784
/**
785
 * Codegen an instruction in 1-src/2-src format.
786
 */
787
static void
788
codegen_inst_gen6(const struct codegen *cg, uint32_t *code)
789
{
790
   ILO_DEV_ASSERT(cg->dev, 6, 8);
791
 
792
   if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
793
      code[0] = translate_inst_gen8(cg);
794
      code[1] = translate_dst_gen8(cg);
795
   } else {
796
      code[0] = translate_inst_gen6(cg);
797
      code[1] = translate_dst_gen6(cg);
798
   }
799
 
800
   code[2] = translate_src_gen6(cg, 0);
801
   code[3] = translate_src_gen6(cg, 1);
802
   assert(src_is_null(cg, 2));
803
}
804
 
805
/**
806
 * Codegen an instruction in 3-src format.
807
 */
808
static void
809
codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code)
810
{
811
   const struct codegen_dst *dst = &cg->dst;
812
   uint32_t dw0, dw1, dw_src[3];
813
   int i;
814
 
815
   ILO_DEV_ASSERT(cg->dev, 6, 8);
816
 
817
   if (ilo_dev_gen(cg->dev) >= ILO_GEN(8))
818
      dw0 = translate_inst_gen8(cg);
819
   else
820
      dw0 = translate_inst_gen6(cg);
821
 
822
   /*
823
    * 3-src instruction restrictions
824
    *
825
    *  - align16 with direct addressing
826
    *  - GRF or MRF dst
827
    *  - GRF src
828
    *  - sub_reg_num is DWORD aligned
829
    *  - no regioning except replication control
830
    *    (vert_stride == 0 && horz_stride == 0)
831
    */
832
   assert(cg->inst->access_mode == GEN6_ALIGN_16);
833
 
834
   assert(!dst->indirect);
835
   assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) ||
836
          (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16));
837
   assert(!(dst->origin & 0x3));
838
   assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
839
 
840
   if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
841
      dw1 = dst->origin << 19 |
842
            dst->writemask << 17 |
843
            cg->src[2].negate << 10 |
844
            cg->src[2].negate << 10 |
845
            cg->src[2].absolute << 9 |
846
            cg->src[1].negate << 8 |
847
            cg->src[1].absolute << 7 |
848
            cg->src[0].negate << 6 |
849
            cg->src[0].absolute << 5 |
850
            cg->inst->mask_ctrl << 2 |
851
            cg->flag_reg_num << 1 |
852
            cg->flag_sub_reg_num;
853
   } else {
854
      dw1 = dst->origin << 19 |
855
            dst->writemask << 17 |
856
            cg->src[2].negate << 9 |
857
            cg->src[2].absolute << 8 |
858
            cg->src[1].negate << 7 |
859
            cg->src[1].absolute << 6 |
860
            cg->src[0].negate << 5 |
861
            cg->src[0].absolute << 4 |
862
            cg->flag_sub_reg_num << 1 |
863
            (dst->file == GEN6_FILE_MRF);
864
   }
865
 
866
   for (i = 0; i < 3; i++) {
867
      const struct codegen_src *src = &cg->src[i];
868
 
869
      assert(!src->indirect);
870
      assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128);
871
      assert(!(src->origin & 0x3));
872
 
873
      assert((src->vert_stride == GEN6_VERTSTRIDE_4 &&
874
              src->horz_stride == GEN6_HORZSTRIDE_1) ||
875
             (src->vert_stride == GEN6_VERTSTRIDE_0 &&
876
              src->horz_stride == GEN6_HORZSTRIDE_0));
877
      assert(src->width == GEN6_WIDTH_4);
878
 
879
      dw_src[i] = src->origin << 7 |
880
                  src->swizzle[3] << 7 |
881
                  src->swizzle[2] << 5 |
882
                  src->swizzle[1] << 3 |
883
                  src->swizzle[0] << 1 |
884
                  (src->vert_stride == GEN6_VERTSTRIDE_0 &&
885
                   src->horz_stride == GEN6_HORZSTRIDE_0);
886
 
887
      /* only the lower 20 bits are used */
888
      assert((dw_src[i] & 0xfffff) == dw_src[i]);
889
   }
890
 
891
   code[0] = dw0;
892
   code[1] = dw1;
893
   /* concatenate the bits of dw_src */
894
   code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
895
   code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
896
}
897
 
898
/**
899
 * Sanity check the region parameters of the operands.
900
 */
901
static void
902
codegen_validate_region_restrictions(const struct codegen *cg)
903
{
904
   const int exec_size_map[] = {
905
      [GEN6_EXECSIZE_1] = 1,
906
      [GEN6_EXECSIZE_2] = 2,
907
      [GEN6_EXECSIZE_4] = 4,
908
      [GEN6_EXECSIZE_8] = 8,
909
      [GEN6_EXECSIZE_16] = 16,
910
      [GEN6_EXECSIZE_32] = 32,
911
   };
912
   const int width_map[] = {
913
      [GEN6_WIDTH_1] = 1,
914
      [GEN6_WIDTH_2] = 2,
915
      [GEN6_WIDTH_4] = 4,
916
      [GEN6_WIDTH_8] = 8,
917
      [GEN6_WIDTH_16] = 16,
918
   };
919
   const int horz_stride_map[] = {
920
      [GEN6_HORZSTRIDE_0] = 0,
921
      [GEN6_HORZSTRIDE_1] = 1,
922
      [GEN6_HORZSTRIDE_2] = 2,
923
      [GEN6_HORZSTRIDE_4] = 4,
924
   };
925
   const int vert_stride_map[] = {
926
      [GEN6_VERTSTRIDE_0] = 0,
927
      [GEN6_VERTSTRIDE_1] = 1,
928
      [GEN6_VERTSTRIDE_2] = 2,
929
      [GEN6_VERTSTRIDE_4] = 4,
930
      [GEN6_VERTSTRIDE_8] = 8,
931
      [GEN6_VERTSTRIDE_16] = 16,
932
      [GEN6_VERTSTRIDE_32] = 32,
933
      [7] = 64,
934
      [8] = 128,
935
      [9] = 256,
936
      [GEN6_VERTSTRIDE_VXH] = 0,
937
   };
938
   const int exec_size = exec_size_map[cg->inst->exec_size];
939
   int i;
940
 
941
   /* Sandy Bridge PRM, volume 4 part 2, page 94 */
942
 
943
   /* 1. (we don't do 32 anyway) */
944
   assert(exec_size <= 16);
945
 
946
   for (i = 0; i < Elements(cg->src); i++) {
947
      const int width = width_map[cg->src[i].width];
948
      const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
949
      const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
950
 
951
      if (src_is_null(cg, i))
952
         break;
953
 
954
      /* 3. */
955
      assert(exec_size >= width);
956
 
957
      if (exec_size == width) {
958
         /* 4. & 5. */
959
         if (horz_stride)
960
            assert(vert_stride == width * horz_stride);
961
      }
962
 
963
      if (width == 1) {
964
         /* 6. */
965
         assert(horz_stride == 0);
966
 
967
         /* 7. */
968
         if (exec_size == 1)
969
            assert(vert_stride == 0);
970
      }
971
 
972
      /* 8. */
973
      if (!vert_stride && !horz_stride)
974
         assert(width == 1);
975
   }
976
 
977
   /* derived from 10.1.2. & 10.2. */
978
   assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0);
979
}
980
 
981
static unsigned
982
translate_vfile(enum toy_file file)
983
{
984
   switch (file) {
985
   case TOY_FILE_ARF:   return GEN6_FILE_ARF;
986
   case TOY_FILE_GRF:   return GEN6_FILE_GRF;
987
   case TOY_FILE_MRF:   return GEN6_FILE_MRF;
988
   case TOY_FILE_IMM:   return GEN6_FILE_IMM;
989
   default:
990
      assert(!"unhandled toy file");
991
      return GEN6_FILE_GRF;
992
   }
993
}
994
 
995
static unsigned
996
translate_vtype(enum toy_type type)
997
{
998
   switch (type) {
999
   case TOY_TYPE_F:     return GEN6_TYPE_F;
1000
   case TOY_TYPE_D:     return GEN6_TYPE_D;
1001
   case TOY_TYPE_UD:    return GEN6_TYPE_UD;
1002
   case TOY_TYPE_W:     return GEN6_TYPE_W;
1003
   case TOY_TYPE_UW:    return GEN6_TYPE_UW;
1004
   case TOY_TYPE_V:     return GEN6_TYPE_V_IMM;
1005
   default:
1006
      assert(!"unhandled toy type");
1007
      return GEN6_TYPE_F;
1008
   }
1009
}
1010
 
1011
static unsigned
1012
translate_writemask(enum toy_writemask writemask)
1013
{
1014
   /* TOY_WRITEMASK_* are compatible with the hardware definitions */
1015
   assert(writemask <= 0xf);
1016
   return writemask;
1017
}
1018
 
1019
static unsigned
1020
translate_swizzle(enum toy_swizzle swizzle)
1021
{
1022
   /* TOY_SWIZZLE_* are compatible with the hardware definitions */
1023
   assert(swizzle <= 3);
1024
   return swizzle;
1025
}
1026
 
1027
/**
1028
 * Prepare for generating an instruction.
1029
 */
1030
static void
1031
codegen_prepare(struct codegen *cg, const struct ilo_dev *dev,
1032
                const struct toy_inst *inst, int pc, int rect_linear_width)
1033
{
1034
   int i;
1035
 
1036
   cg->dev = dev;
1037
   cg->inst = inst;
1038
   cg->pc = pc;
1039
 
1040
   cg->flag_reg_num = 0;
1041
   cg->flag_sub_reg_num = 0;
1042
 
1043
   cg->dst.file = translate_vfile(inst->dst.file);
1044
   cg->dst.type = translate_vtype(inst->dst.type);
1045
   cg->dst.indirect = inst->dst.indirect;
1046
   cg->dst.indirect_subreg = inst->dst.indirect_subreg;
1047
   cg->dst.origin = inst->dst.val32;
1048
 
1049
   /*
1050
    * From the Sandy Bridge PRM, volume 4 part 2, page 81:
1051
    *
1052
    *     "For a word or an unsigned word immediate data, software must
1053
    *      replicate the same 16-bit immediate value to both the lower word
1054
    *      and the high word of the 32-bit immediate field in an instruction."
1055
    */
1056
   if (inst->dst.file == TOY_FILE_IMM) {
1057
      switch (inst->dst.type) {
1058
      case TOY_TYPE_W:
1059
      case TOY_TYPE_UW:
1060
         cg->dst.origin &= 0xffff;
1061
         cg->dst.origin |= cg->dst.origin << 16;
1062
         break;
1063
      default:
1064
         break;
1065
      }
1066
   }
1067
 
1068
   cg->dst.writemask = translate_writemask(inst->dst.writemask);
1069
 
1070
   switch (inst->dst.rect) {
1071
   case TOY_RECT_LINEAR:
1072
      cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1073
      break;
1074
   default:
1075
      assert(!"unsupported dst region");
1076
      cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1077
      break;
1078
   }
1079
 
1080
   for (i = 0; i < Elements(cg->src); i++) {
1081
      struct codegen_src *src = &cg->src[i];
1082
 
1083
      src->file = translate_vfile(inst->src[i].file);
1084
      src->type = translate_vtype(inst->src[i].type);
1085
      src->indirect = inst->src[i].indirect;
1086
      src->indirect_subreg = inst->src[i].indirect_subreg;
1087
      src->origin = inst->src[i].val32;
1088
 
1089
      /* do the same for src */
1090
      if (inst->dst.file == TOY_FILE_IMM) {
1091
         switch (inst->src[i].type) {
1092
         case TOY_TYPE_W:
1093
         case TOY_TYPE_UW:
1094
            src->origin &= 0xffff;
1095
            src->origin |= src->origin << 16;
1096
            break;
1097
         default:
1098
            break;
1099
         }
1100
      }
1101
 
1102
      src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
1103
      src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
1104
      src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
1105
      src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
1106
      src->absolute = inst->src[i].absolute;
1107
      src->negate = inst->src[i].negate;
1108
 
1109
      switch (inst->src[i].rect) {
1110
      case TOY_RECT_LINEAR:
1111
         switch (rect_linear_width) {
1112
         case 1:
1113
            src->vert_stride = GEN6_VERTSTRIDE_1;
1114
            src->width = GEN6_WIDTH_1;
1115
            break;
1116
         case 2:
1117
            src->vert_stride = GEN6_VERTSTRIDE_2;
1118
            src->width = GEN6_WIDTH_2;
1119
            break;
1120
         case 4:
1121
            src->vert_stride = GEN6_VERTSTRIDE_4;
1122
            src->width = GEN6_WIDTH_4;
1123
            break;
1124
         case 8:
1125
            src->vert_stride = GEN6_VERTSTRIDE_8;
1126
            src->width = GEN6_WIDTH_8;
1127
            break;
1128
         case 16:
1129
            src->vert_stride = GEN6_VERTSTRIDE_16;
1130
            src->width = GEN6_WIDTH_16;
1131
            break;
1132
         default:
1133
            assert(!"unsupported TOY_RECT_LINEAR width");
1134
            src->vert_stride = GEN6_VERTSTRIDE_1;
1135
            src->width = GEN6_WIDTH_1;
1136
            break;
1137
         }
1138
         src->horz_stride = GEN6_HORZSTRIDE_1;
1139
         break;
1140
      case TOY_RECT_041:
1141
         src->vert_stride = GEN6_VERTSTRIDE_0;
1142
         src->width = GEN6_WIDTH_4;
1143
         src->horz_stride = GEN6_HORZSTRIDE_1;
1144
         break;
1145
      case TOY_RECT_010:
1146
         src->vert_stride = GEN6_VERTSTRIDE_0;
1147
         src->width = GEN6_WIDTH_1;
1148
         src->horz_stride = GEN6_HORZSTRIDE_0;
1149
         break;
1150
      case TOY_RECT_220:
1151
         src->vert_stride = GEN6_VERTSTRIDE_2;
1152
         src->width = GEN6_WIDTH_2;
1153
         src->horz_stride = GEN6_HORZSTRIDE_0;
1154
         break;
1155
      case TOY_RECT_440:
1156
         src->vert_stride = GEN6_VERTSTRIDE_4;
1157
         src->width = GEN6_WIDTH_4;
1158
         src->horz_stride = GEN6_HORZSTRIDE_0;
1159
         break;
1160
      case TOY_RECT_240:
1161
         src->vert_stride = GEN6_VERTSTRIDE_2;
1162
         src->width = GEN6_WIDTH_4;
1163
         src->horz_stride = GEN6_HORZSTRIDE_0;
1164
         break;
1165
      default:
1166
         assert(!"unsupported src region");
1167
         src->vert_stride = GEN6_VERTSTRIDE_1;
1168
         src->width = GEN6_WIDTH_1;
1169
         src->horz_stride = GEN6_HORZSTRIDE_1;
1170
         break;
1171
      }
1172
   }
1173
}
1174
 
1175
/**
1176
 * Generate HW shader code.  The instructions should have been legalized.
1177
 */
1178
void *
1179
toy_compiler_assemble(struct toy_compiler *tc, int *size)
1180
{
1181
   const struct toy_inst *inst;
1182
   uint32_t *code;
1183
   int pc;
1184
 
1185
   code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
1186
   if (!code)
1187
      return NULL;
1188
 
1189
   pc = 0;
1190
   tc_head(tc);
1191
   while ((inst = tc_next(tc)) != NULL) {
1192
      uint32_t *dw = &code[pc * 4];
1193
      struct codegen cg;
1194
 
1195
      if (pc >= tc->num_instructions) {
1196
         tc_fail(tc, "wrong instructoun count");
1197
         break;
1198
      }
1199
 
1200
      codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width);
1201
      codegen_validate_region_restrictions(&cg);
1202
 
1203
      switch (inst->opcode) {
1204
      case GEN6_OPCODE_MAD:
1205
         codegen_inst_3src_gen6(&cg, dw);
1206
         break;
1207
      default:
1208
         codegen_inst_gen6(&cg, dw);
1209
         break;
1210
      }
1211
 
1212
      pc++;
1213
   }
1214
 
1215
   /* never return an invalid kernel */
1216
   if (tc->fail) {
1217
      FREE(code);
1218
      return NULL;
1219
   }
1220
 
1221
   if (size)
1222
      *size = pc * 4 * sizeof(uint32_t);
1223
 
1224
   return code;
1225
}