Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 2012-2013 LunarG, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Chia-I Wu 
26
 */
27
 
28
#include "tgsi/tgsi_dump.h"
29
#include "tgsi/tgsi_util.h"
30
#include "toy_compiler.h"
31
#include "toy_tgsi.h"
32
#include "toy_legalize.h"
33
#include "toy_optimize.h"
34
#include "toy_helpers.h"
35
#include "ilo_shader_internal.h"
36
 
37
struct fs_compile_context {
38
   struct ilo_shader *shader;
39
   const struct ilo_shader_variant *variant;
40
 
41
   struct toy_compiler tc;
42
   struct toy_tgsi tgsi;
43
 
44
   int const_cache;
45
   int dispatch_mode;
46
 
47
   struct {
48
      int interp_perspective_pixel;
49
      int interp_perspective_centroid;
50
      int interp_perspective_sample;
51
      int interp_nonperspective_pixel;
52
      int interp_nonperspective_centroid;
53
      int interp_nonperspective_sample;
54
      int source_depth;
55
      int source_w;
56
      int pos_offset;
57
   } payloads[2];
58
 
59
   int first_const_grf;
60
   int first_attr_grf;
61
   int first_free_grf;
62
   int last_free_grf;
63
 
64
   int num_grf_per_vrf;
65
 
66
   int first_free_mrf;
67
   int last_free_mrf;
68
};
69
 
70
static void
71
fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
72
{
73
   struct toy_compiler *tc = &fcc->tc;
74
   const struct toy_src src_z =
75
      tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
76
   const struct toy_src src_w =
77
      tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
78
   const int fb_height =
79
      (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
80
   const bool origin_upper_left =
81
      (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
82
   const bool pixel_center_integer =
83
      (fcc->tgsi.props.fs_coord_pixel_center ==
84
       TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
85
   struct toy_src subspan_x, subspan_y;
86
   struct toy_dst tmp, tmp_uw;
87
   struct toy_dst real_dst[4];
88
 
89
   tdst_transpose(dst, real_dst);
90
 
91
   subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
92
   subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
93
 
94
   subspan_y = tsrc_offset(subspan_x, 0, 1);
95
 
96
   tmp_uw = tdst_uw(tc_alloc_tmp(tc));
97
   tmp = tc_alloc_tmp(tc);
98
 
99
   /* X */
100
   tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
101
   tc_MOV(tc, tmp, tsrc_from(tmp_uw));
102
   if (pixel_center_integer)
103
      tc_MOV(tc, real_dst[0], tsrc_from(tmp));
104
   else
105
      tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
106
 
107
   /* Y */
108
   tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
109
   tc_MOV(tc, tmp, tsrc_from(tmp_uw));
110
   if (origin_upper_left && pixel_center_integer) {
111
      tc_MOV(tc, real_dst[1], tsrc_from(tmp));
112
   }
113
   else {
114
      struct toy_src y = tsrc_from(tmp);
115
      float offset = 0.0f;
116
 
117
      if (!pixel_center_integer)
118
         offset += 0.5f;
119
 
120
      if (!origin_upper_left) {
121
         offset += (float) (fb_height - 1);
122
         y = tsrc_negate(y);
123
      }
124
 
125
      tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
126
   }
127
 
128
   /* Z and W */
129
   tc_MOV(tc, real_dst[2], src_z);
130
   tc_INV(tc, real_dst[3], src_w);
131
}
132
 
133
static void
134
fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
135
{
136
   struct toy_compiler *tc = &fcc->tc;
137
   const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
138
   struct toy_dst tmp_f, tmp;
139
   struct toy_dst real_dst[4];
140
 
141
   tdst_transpose(dst, real_dst);
142
 
143
   tmp_f = tc_alloc_tmp(tc);
144
   tmp = tdst_d(tmp_f);
145
   tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
146
   tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
147
   tc_MOV(tc, tmp_f, tsrc_from(tmp));
148
 
149
   /* convert to 1.0 and -1.0 */
150
   tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
151
   tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
152
 
153
   tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
154
   tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
155
   tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
156
}
157
 
158
static void
159
fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
160
{
161
   struct toy_compiler *tc = &fcc->tc;
162
   struct toy_dst real_dst[4];
163
   bool is_const = false;
164
   int grf, interp, ch;
165
 
166
   tdst_transpose(dst, real_dst);
167
 
168
   grf = fcc->first_attr_grf + slot * 2;
169
 
170
   switch (fcc->tgsi.inputs[slot].interp) {
171
   case TGSI_INTERPOLATE_CONSTANT:
172
      is_const = true;
173
      break;
174
   case TGSI_INTERPOLATE_LINEAR:
175
      if (fcc->tgsi.inputs[slot].centroid)
176
         interp = fcc->payloads[0].interp_nonperspective_centroid;
177
      else
178
         interp = fcc->payloads[0].interp_nonperspective_pixel;
179
      break;
180
   case TGSI_INTERPOLATE_COLOR:
181
      if (fcc->variant->u.fs.flatshade) {
182
         is_const = true;
183
         break;
184
      }
185
      /* fall through */
186
   case TGSI_INTERPOLATE_PERSPECTIVE:
187
      if (fcc->tgsi.inputs[slot].centroid)
188
         interp = fcc->payloads[0].interp_perspective_centroid;
189
      else
190
         interp = fcc->payloads[0].interp_perspective_pixel;
191
      break;
192
   default:
193
      assert(!"unexpected FS interpolation");
194
      interp = fcc->payloads[0].interp_perspective_pixel;
195
      break;
196
   }
197
 
198
   if (is_const) {
199
      struct toy_src a0[4];
200
 
201
      a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
202
      a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
203
      a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
204
      a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
205
 
206
      for (ch = 0; ch < 4; ch++)
207
         tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
208
   }
209
   else {
210
      struct toy_src attr[4], uv;
211
 
212
      attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
213
      attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
214
      attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
215
      attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
216
 
217
      uv = tsrc(TOY_FILE_GRF, interp, 0);
218
 
219
      for (ch = 0; ch < 4; ch++) {
220
         tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch],
221
               tsrc_rect(attr[ch], TOY_RECT_010), uv);
222
      }
223
   }
224
 
225
   if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
226
      tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
227
      tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
228
      tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
229
   }
230
}
231
 
232
static void
233
fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
234
                        struct toy_dst dst, int dim, int idx)
235
{
236
   int slot;
237
 
238
   assert(!dim);
239
 
240
   slot = toy_tgsi_find_input(&fcc->tgsi, idx);
241
   if (slot < 0)
242
      return;
243
 
244
   switch (fcc->tgsi.inputs[slot].semantic_name) {
245
   case TGSI_SEMANTIC_POSITION:
246
      fetch_position(fcc, dst);
247
      break;
248
   case TGSI_SEMANTIC_FACE:
249
      fetch_face(fcc, dst);
250
      break;
251
   default:
252
      fetch_attr(fcc, dst, slot);
253
      break;
254
   }
255
}
256
 
257
static void
258
fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
259
                                    struct toy_dst dst, int dim,
260
                                    struct toy_src idx)
261
{
262
   const struct toy_dst offset =
263
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
264
   struct toy_compiler *tc = &fcc->tc;
265
   unsigned simd_mode, param_size;
266
   struct toy_inst *inst;
267
   struct toy_src desc, real_src[4];
268
   struct toy_dst tmp, real_dst[4];
269
   int i;
270
 
271
   tsrc_transpose(idx, real_src);
272
 
273
   /* set offset */
274
   inst = tc_MOV(tc, offset, real_src[0]);
275
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
276
 
277
   switch (inst->exec_size) {
278
   case GEN6_EXECSIZE_8:
279
      simd_mode = GEN6_MSG_SAMPLER_SIMD8;
280
      param_size = 1;
281
      break;
282
   case GEN6_EXECSIZE_16:
283
      simd_mode = GEN6_MSG_SAMPLER_SIMD16;
284
      param_size = 2;
285
      break;
286
   default:
287
      assert(!"unsupported execution size");
288
      tc_MOV(tc, dst, tsrc_imm_f(0.0f));
289
      return;
290
      break;
291
   }
292
 
293
   desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
294
         simd_mode,
295
         GEN6_MSG_SAMPLER_LD,
296
         0,
297
         fcc->shader->bt.const_base + dim);
298
 
299
   tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
300
   inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
301
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
302
 
303
   tdst_transpose(dst, real_dst);
304
   for (i = 0; i < 4; i++) {
305
      const struct toy_src src =
306
         tsrc_offset(tsrc_from(tmp), param_size * i, 0);
307
 
308
      /* cast to type D to make sure these are raw moves */
309
      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
310
   }
311
}
312
 
313
static bool
314
fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
315
                               struct toy_dst dst, int dim,
316
                               struct toy_src idx)
317
{
318
   const int grf = fcc->first_const_grf + idx.val32 / 2;
319
   const int grf_subreg = (idx.val32 & 1) * 16;
320
   struct toy_src src;
321
   struct toy_dst real_dst[4];
322
   int i;
323
 
324
   if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
325
       grf >= fcc->first_attr_grf)
326
      return false;
327
 
328
   src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
329
 
330
   tdst_transpose(dst, real_dst);
331
   for (i = 0; i < 4; i++) {
332
      /* cast to type D to make sure these are raw moves */
333
      tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
334
   }
335
 
336
   return true;
337
}
338
 
339
static void
340
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
341
                                struct toy_dst dst, int dim, struct toy_src idx)
342
{
343
   const struct toy_dst header =
344
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
345
   const struct toy_dst global_offset =
346
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
347
   const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
348
   struct toy_compiler *tc = &fcc->tc;
349
   unsigned msg_type, msg_ctrl, msg_len;
350
   struct toy_inst *inst;
351
   struct toy_src desc;
352
   struct toy_dst tmp, real_dst[4];
353
   int i;
354
 
355
   if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
356
      return;
357
 
358
   /* set message header */
359
   inst = tc_MOV(tc, header, r0);
360
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
361
 
362
   /* set global offset */
363
   inst = tc_MOV(tc, global_offset, idx);
364
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
365
   inst->exec_size = GEN6_EXECSIZE_1;
366
   inst->src[0].rect = TOY_RECT_010;
367
 
368
   msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ;
369
   msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO;
370
   msg_len = 1;
371
 
372
   desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
373
         msg_type, msg_ctrl, fcc->shader->bt.const_base + dim);
374
 
375
   tmp = tc_alloc_tmp(tc);
376
 
377
   tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
378
 
379
   tdst_transpose(dst, real_dst);
380
   for (i = 0; i < 4; i++) {
381
      const struct toy_src src =
382
         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
383
 
384
      /* cast to type D to make sure these are raw moves */
385
      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
386
   }
387
}
388
 
389
static void
390
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
391
                                struct toy_dst dst, int dim, struct toy_src idx)
392
{
393
   struct toy_compiler *tc = &fcc->tc;
394
   const struct toy_dst offset =
395
      tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
396
   struct toy_src desc;
397
   struct toy_inst *inst;
398
   struct toy_dst tmp, real_dst[4];
399
   int i;
400
 
401
   if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
402
      return;
403
 
404
   /*
405
    * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
406
    * changed from OWord Block Read to ld to increase performance in the
407
    * classic driver.  Since we use the constant cache instead of the data
408
    * cache, I wonder if we still want to follow the classic driver.
409
    */
410
 
411
   /* set offset */
412
   inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
413
   inst->exec_size = GEN6_EXECSIZE_8;
414
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
415
 
416
   desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
417
         GEN6_MSG_SAMPLER_SIMD4X2,
418
         GEN6_MSG_SAMPLER_LD,
419
         0,
420
         fcc->shader->bt.const_base + dim);
421
 
422
   tmp = tc_alloc_tmp(tc);
423
   inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
424
   inst->exec_size = GEN6_EXECSIZE_8;
425
   inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
426
 
427
   tdst_transpose(dst, real_dst);
428
   for (i = 0; i < 4; i++) {
429
      const struct toy_src src =
430
         tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
431
 
432
      /* cast to type D to make sure these are raw moves */
433
      tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
434
   }
435
}
436
 
437
static void
438
fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
439
                         struct toy_dst dst, int idx)
440
{
441
   const uint32_t *imm;
442
   struct toy_dst real_dst[4];
443
   int ch;
444
 
445
   imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
446
 
447
   tdst_transpose(dst, real_dst);
448
   /* raw moves */
449
   for (ch = 0; ch < 4; ch++)
450
      tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
451
}
452
 
453
static void
454
fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
455
                        struct toy_dst dst, int dim, int idx)
456
{
457
   struct toy_compiler *tc = &fcc->tc;
458
   const struct toy_tgsi *tgsi = &fcc->tgsi;
459
   int slot;
460
 
461
   assert(!dim);
462
 
463
   slot = toy_tgsi_find_system_value(tgsi, idx);
464
   if (slot < 0)
465
      return;
466
 
467
   switch (tgsi->system_values[slot].semantic_name) {
468
   case TGSI_SEMANTIC_PRIMID:
469
   case TGSI_SEMANTIC_INSTANCEID:
470
   case TGSI_SEMANTIC_VERTEXID:
471
   default:
472
      tc_fail(tc, "unhandled system value");
473
      tc_MOV(tc, dst, tsrc_imm_d(0));
474
      break;
475
   }
476
}
477
 
478
static void
479
fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
480
                            struct toy_inst *inst)
481
{
482
   struct toy_compiler *tc = &fcc->tc;
483
   int dim, idx;
484
 
485
   assert(inst->src[0].file == TOY_FILE_IMM);
486
   dim = inst->src[0].val32;
487
 
488
   assert(inst->src[1].file == TOY_FILE_IMM);
489
   idx = inst->src[1].val32;
490
 
491
   switch (inst->opcode) {
492
   case TOY_OPCODE_TGSI_IN:
493
      fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
494
      break;
495
   case TOY_OPCODE_TGSI_CONST:
496
      if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
497
         fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
498
      else
499
         fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
500
      break;
501
   case TOY_OPCODE_TGSI_SV:
502
      fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
503
      break;
504
   case TOY_OPCODE_TGSI_IMM:
505
      assert(!dim);
506
      fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
507
      break;
508
   default:
509
      tc_fail(tc, "unhandled TGSI fetch");
510
      break;
511
   }
512
 
513
   tc_discard_inst(tc, inst);
514
}
515
 
516
static void
517
fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
518
                              struct toy_inst *inst)
519
{
520
   struct toy_compiler *tc = &fcc->tc;
521
   enum tgsi_file_type file;
522
   int dim, idx;
523
   struct toy_src indirect_dim, indirect_idx;
524
 
525
   assert(inst->src[0].file == TOY_FILE_IMM);
526
   file = inst->src[0].val32;
527
 
528
   assert(inst->src[1].file == TOY_FILE_IMM);
529
   dim = inst->src[1].val32;
530
   indirect_dim = inst->src[2];
531
 
532
   assert(inst->src[3].file == TOY_FILE_IMM);
533
   idx = inst->src[3].val32;
534
   indirect_idx = inst->src[4];
535
 
536
   /* no dimension indirection */
537
   assert(indirect_dim.file == TOY_FILE_IMM);
538
   dim += indirect_dim.val32;
539
 
540
   switch (inst->opcode) {
541
   case TOY_OPCODE_TGSI_INDIRECT_FETCH:
542
      if (file == TGSI_FILE_CONSTANT) {
543
         if (idx) {
544
            struct toy_dst tmp = tc_alloc_tmp(tc);
545
 
546
            tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
547
            indirect_idx = tsrc_from(tmp);
548
         }
549
 
550
         fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
551
         break;
552
      }
553
      /* fall through */
554
   case TOY_OPCODE_TGSI_INDIRECT_STORE:
555
   default:
556
      tc_fail(tc, "unhandled TGSI indirection");
557
      break;
558
   }
559
 
560
   tc_discard_inst(tc, inst);
561
}
562
 
563
/**
564
 * Emit instructions to move sampling parameters to the message registers.
565
 */
566
static int
567
fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
568
                           int base_mrf, int param_size,
569
                           struct toy_src *coords, int num_coords,
570
                           struct toy_src bias_or_lod, struct toy_src ref_or_si,
571
                           struct toy_src *ddx, struct toy_src *ddy,
572
                           int num_derivs)
573
{
574
   int num_params, i;
575
 
576
   assert(num_coords <= 4);
577
   assert(num_derivs <= 3 && num_derivs <= num_coords);
578
 
579
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
580
   switch (msg_type) {
581
   case GEN6_MSG_SAMPLER_SAMPLE:
582
      for (i = 0; i < num_coords; i++)
583
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
584
      num_params = num_coords;
585
      break;
586
   case GEN6_MSG_SAMPLER_SAMPLE_B:
587
   case GEN6_MSG_SAMPLER_SAMPLE_L:
588
      for (i = 0; i < num_coords; i++)
589
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
590
      tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
591
      num_params = 5;
592
      break;
593
   case GEN6_MSG_SAMPLER_SAMPLE_C:
594
      for (i = 0; i < num_coords; i++)
595
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
596
      tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
597
      num_params = 5;
598
      break;
599
   case GEN6_MSG_SAMPLER_SAMPLE_D:
600
      for (i = 0; i < num_coords; i++)
601
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
602
      for (i = 0; i < num_derivs; i++) {
603
         tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
604
         tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
605
      }
606
      num_params = 4 + num_derivs * 2;
607
      break;
608
   case GEN6_MSG_SAMPLER_SAMPLE_B_C:
609
   case GEN6_MSG_SAMPLER_SAMPLE_L_C:
610
      for (i = 0; i < num_coords; i++)
611
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
612
      tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
613
      tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
614
      num_params = 6;
615
      break;
616
   case GEN6_MSG_SAMPLER_LD:
617
      assert(num_coords <= 3);
618
 
619
      for (i = 0; i < num_coords; i++)
620
         tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
621
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
622
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
623
      num_params = 5;
624
      break;
625
   case GEN6_MSG_SAMPLER_RESINFO:
626
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
627
      num_params = 1;
628
      break;
629
   default:
630
      tc_fail(tc, "unknown sampler opcode");
631
      num_params = 0;
632
      break;
633
   }
634
#undef SAMPLER_PARAM
635
 
636
   return num_params * param_size;
637
}
638
 
639
static int
640
fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
641
                           int base_mrf, int param_size,
642
                           struct toy_src *coords, int num_coords,
643
                           struct toy_src bias_or_lod, struct toy_src ref_or_si,
644
                           struct toy_src *ddx, struct toy_src *ddy,
645
                           int num_derivs)
646
{
647
   int num_params, i;
648
 
649
   assert(num_coords <= 4);
650
   assert(num_derivs <= 3 && num_derivs <= num_coords);
651
 
652
#define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
653
   switch (msg_type) {
654
   case GEN6_MSG_SAMPLER_SAMPLE:
655
      for (i = 0; i < num_coords; i++)
656
         tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
657
      num_params = num_coords;
658
      break;
659
   case GEN6_MSG_SAMPLER_SAMPLE_B:
660
   case GEN6_MSG_SAMPLER_SAMPLE_L:
661
      tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
662
      for (i = 0; i < num_coords; i++)
663
         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
664
      num_params = 1 + num_coords;
665
      break;
666
   case GEN6_MSG_SAMPLER_SAMPLE_C:
667
      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
668
      for (i = 0; i < num_coords; i++)
669
         tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
670
      num_params = 1 + num_coords;
671
      break;
672
   case GEN6_MSG_SAMPLER_SAMPLE_D:
673
      for (i = 0; i < num_coords; i++) {
674
         tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
675
         if (i < num_derivs) {
676
            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
677
            tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
678
         }
679
      }
680
      num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
681
      break;
682
   case GEN6_MSG_SAMPLER_SAMPLE_B_C:
683
   case GEN6_MSG_SAMPLER_SAMPLE_L_C:
684
      tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
685
      tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
686
      for (i = 0; i < num_coords; i++)
687
         tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
688
      num_params = 2 + num_coords;
689
      break;
690
   case GEN6_MSG_SAMPLER_LD:
691
      assert(num_coords >= 1 && num_coords <= 3);
692
 
693
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
694
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
695
      for (i = 1; i < num_coords; i++)
696
         tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
697
      num_params = 1 + num_coords;
698
      break;
699
   case GEN6_MSG_SAMPLER_RESINFO:
700
      tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
701
      num_params = 1;
702
      break;
703
   default:
704
      tc_fail(tc, "unknown sampler opcode");
705
      num_params = 0;
706
      break;
707
   }
708
#undef SAMPLER_PARAM
709
 
710
   return num_params * param_size;
711
}
712
 
713
/**
714
 * Set up message registers and return the message descriptor for sampling.
715
 */
716
static struct toy_src
717
fs_prepare_tgsi_sampling(struct fs_compile_context *fcc,
718
                         const struct toy_inst *inst,
719
                         int base_mrf, const uint32_t *saturate_coords,
720
                         unsigned *ret_sampler_index)
721
{
722
   struct toy_compiler *tc = &fcc->tc;
723
   unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
724
   struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
725
   int num_coords, ref_pos, num_derivs;
726
   int sampler_src, param_size, i;
727
 
728
   switch (inst->exec_size) {
729
   case GEN6_EXECSIZE_8:
730
      simd_mode = GEN6_MSG_SAMPLER_SIMD8;
731
      param_size = 1;
732
      break;
733
   case GEN6_EXECSIZE_16:
734
      simd_mode = GEN6_MSG_SAMPLER_SIMD16;
735
      param_size = 2;
736
      break;
737
   default:
738
      tc_fail(tc, "unsupported execute size for sampling");
739
      return tsrc_null();
740
      break;
741
   }
742
 
743
   num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
744
   tsrc_transpose(inst->src[0], coords);
745
   bias_or_lod = tsrc_null();
746
   ref_or_si = tsrc_null();
747
   num_derivs = 0;
748
   sampler_src = 1;
749
 
750
   /*
751
    * For TXD,
752
    *
753
    *   src0 := (x, y, z, w)
754
    *   src1 := ddx
755
    *   src2 := ddy
756
    *   src3 := sampler
757
    *
758
    * For TEX2, TXB2, and TXL2,
759
    *
760
    *   src0 := (x, y, z, w)
761
    *   src1 := (v or bias or lod, ...)
762
    *   src2 := sampler
763
    *
764
    * For TEX, TXB, TXL, and TXP,
765
    *
766
    *   src0 := (x, y, z, w or bias or lod or projection)
767
    *   src1 := sampler
768
    *
769
    * For TXQ,
770
    *
771
    *   src0 := (lod, ...)
772
    *   src1 := sampler
773
    *
774
    * For TXQ_LZ,
775
    *
776
    *   src0 := sampler
777
    *
778
    * And for TXF,
779
    *
780
    *   src0 := (x, y, z, w or lod)
781
    *   src1 := sampler
782
    *
783
    * State trackers should not generate opcode+texture combinations with
784
    * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
785
    */
786
   switch (inst->opcode) {
787
   case TOY_OPCODE_TGSI_TEX:
788
      if (ref_pos >= 0) {
789
         assert(ref_pos < 4);
790
 
791
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
792
         ref_or_si = coords[ref_pos];
793
      }
794
      else {
795
         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
796
      }
797
      break;
798
   case TOY_OPCODE_TGSI_TXD:
799
      if (ref_pos >= 0) {
800
         assert(ref_pos < 4);
801
 
802
         msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
803
         ref_or_si = coords[ref_pos];
804
 
805
         if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
806
            tc_fail(tc, "TXD with shadow sampler not supported");
807
      }
808
      else {
809
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
810
      }
811
 
812
      tsrc_transpose(inst->src[1], ddx);
813
      tsrc_transpose(inst->src[2], ddy);
814
      num_derivs = num_coords;
815
      sampler_src = 3;
816
      break;
817
   case TOY_OPCODE_TGSI_TXP:
818
      if (ref_pos >= 0) {
819
         assert(ref_pos < 3);
820
 
821
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
822
         ref_or_si = coords[ref_pos];
823
      }
824
      else {
825
         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
826
      }
827
 
828
      /* project the coordinates */
829
      {
830
         struct toy_dst tmp[4];
831
 
832
         tc_alloc_tmp4(tc, tmp);
833
 
834
         tc_INV(tc, tmp[3], coords[3]);
835
         for (i = 0; i < num_coords && i < 3; i++) {
836
            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
837
            coords[i] = tsrc_from(tmp[i]);
838
         }
839
 
840
         if (ref_pos >= i) {
841
            tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
842
            ref_or_si = tsrc_from(tmp[ref_pos]);
843
         }
844
      }
845
      break;
846
   case TOY_OPCODE_TGSI_TXB:
847
      if (ref_pos >= 0) {
848
         assert(ref_pos < 3);
849
 
850
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
851
         ref_or_si = coords[ref_pos];
852
      }
853
      else {
854
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
855
      }
856
 
857
      bias_or_lod = coords[3];
858
      break;
859
   case TOY_OPCODE_TGSI_TXL:
860
      if (ref_pos >= 0) {
861
         assert(ref_pos < 3);
862
 
863
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
864
         ref_or_si = coords[ref_pos];
865
      }
866
      else {
867
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
868
      }
869
 
870
      bias_or_lod = coords[3];
871
      break;
872
   case TOY_OPCODE_TGSI_TXF:
873
      msg_type = GEN6_MSG_SAMPLER_LD;
874
 
875
      switch (inst->tex.target) {
876
      case TGSI_TEXTURE_2D_MSAA:
877
      case TGSI_TEXTURE_2D_ARRAY_MSAA:
878
         assert(ref_pos >= 0 && ref_pos < 4);
879
         /* lod is always 0 */
880
         bias_or_lod = tsrc_imm_d(0);
881
         ref_or_si = coords[ref_pos];
882
         break;
883
      default:
884
         bias_or_lod = coords[3];
885
         break;
886
      }
887
 
888
      /* offset the coordinates */
889
      if (!tsrc_is_null(inst->tex.offsets[0])) {
890
         struct toy_dst tmp[4];
891
         struct toy_src offsets[4];
892
 
893
         tc_alloc_tmp4(tc, tmp);
894
         tsrc_transpose(inst->tex.offsets[0], offsets);
895
 
896
         for (i = 0; i < num_coords; i++) {
897
            tc_ADD(tc, tmp[i], coords[i], offsets[i]);
898
            coords[i] = tsrc_from(tmp[i]);
899
         }
900
      }
901
 
902
      sampler_src = 1;
903
      break;
904
   case TOY_OPCODE_TGSI_TXQ:
905
      msg_type = GEN6_MSG_SAMPLER_RESINFO;
906
      num_coords = 0;
907
      bias_or_lod = coords[0];
908
      break;
909
   case TOY_OPCODE_TGSI_TXQ_LZ:
910
      msg_type = GEN6_MSG_SAMPLER_RESINFO;
911
      num_coords = 0;
912
      sampler_src = 0;
913
      break;
914
   case TOY_OPCODE_TGSI_TEX2:
915
      if (ref_pos >= 0) {
916
         assert(ref_pos < 5);
917
 
918
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
919
 
920
         if (ref_pos >= 4) {
921
            struct toy_src src1[4];
922
            tsrc_transpose(inst->src[1], src1);
923
            ref_or_si = src1[ref_pos - 4];
924
         }
925
         else {
926
            ref_or_si = coords[ref_pos];
927
         }
928
      }
929
      else {
930
         msg_type = GEN6_MSG_SAMPLER_SAMPLE;
931
      }
932
 
933
      sampler_src = 2;
934
      break;
935
   case TOY_OPCODE_TGSI_TXB2:
936
      if (ref_pos >= 0) {
937
         assert(ref_pos < 4);
938
 
939
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
940
         ref_or_si = coords[ref_pos];
941
      }
942
      else {
943
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
944
      }
945
 
946
      {
947
         struct toy_src src1[4];
948
         tsrc_transpose(inst->src[1], src1);
949
         bias_or_lod = src1[0];
950
      }
951
 
952
      sampler_src = 2;
953
      break;
954
   case TOY_OPCODE_TGSI_TXL2:
955
      if (ref_pos >= 0) {
956
         assert(ref_pos < 4);
957
 
958
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
959
         ref_or_si = coords[ref_pos];
960
      }
961
      else {
962
         msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
963
      }
964
 
965
      {
966
         struct toy_src src1[4];
967
         tsrc_transpose(inst->src[1], src1);
968
         bias_or_lod = src1[0];
969
      }
970
 
971
      sampler_src = 2;
972
      break;
973
   default:
974
      assert(!"unhandled sampling opcode");
975
      return tsrc_null();
976
      break;
977
   }
978
 
979
   assert(inst->src[sampler_src].file == TOY_FILE_IMM);
980
   sampler_index = inst->src[sampler_src].val32;
981
   binding_table_index = fcc->shader->bt.tex_base + sampler_index;
982
 
983
   /*
984
    * From the Sandy Bridge PRM, volume 4 part 1, page 18:
985
    *
986
    *     "Note that the (cube map) coordinates delivered to the sampling
987
    *      engine must already have been divided by the component with the
988
    *      largest absolute value."
989
    */
990
   switch (inst->tex.target) {
991
   case TGSI_TEXTURE_CUBE:
992
   case TGSI_TEXTURE_SHADOWCUBE:
993
   case TGSI_TEXTURE_CUBE_ARRAY:
994
   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
995
      /* TXQ does not need coordinates */
996
      if (num_coords >= 3) {
997
         struct toy_dst tmp[4];
998
 
999
         tc_alloc_tmp4(tc, tmp);
1000
 
1001
         tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
1002
               tsrc_absolute(coords[1]), GEN6_COND_GE);
1003
         tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
1004
               tsrc_absolute(coords[2]), GEN6_COND_GE);
1005
         tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
1006
 
1007
         for (i = 0; i < 3; i++) {
1008
            tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
1009
            coords[i] = tsrc_from(tmp[i]);
1010
         }
1011
      }
1012
      break;
1013
   }
1014
 
1015
   /*
1016
    * Saturate (s, t, r).  saturate_coords is set for sampler and coordinate
1017
    * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively.  It is
1018
    * so that sampling outside the border gets the correct colors.
1019
    */
1020
   for (i = 0; i < MIN2(num_coords, 3); i++) {
1021
      bool is_rect;
1022
 
1023
      if (!(saturate_coords[i] & (1 << sampler_index)))
1024
         continue;
1025
 
1026
      switch (inst->tex.target) {
1027
      case TGSI_TEXTURE_RECT:
1028
      case TGSI_TEXTURE_SHADOWRECT:
1029
         is_rect = true;
1030
         break;
1031
      default:
1032
         is_rect = false;
1033
         break;
1034
      }
1035
 
1036
      if (is_rect) {
1037
         struct toy_src min, max;
1038
         struct toy_dst tmp;
1039
 
1040
         tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1041
         tmp = tc_alloc_tmp(tc);
1042
 
1043
         /* saturate to [0, width] or [0, height] */
1044
         /* TODO TXQ? */
1045
         min = tsrc_imm_f(0.0f);
1046
         max = tsrc_imm_f(2048.0f);
1047
 
1048
         tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G);
1049
         tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L);
1050
 
1051
         coords[i] = tsrc_from(tmp);
1052
      }
1053
      else {
1054
         struct toy_dst tmp;
1055
         struct toy_inst *inst2;
1056
 
1057
         tmp = tc_alloc_tmp(tc);
1058
 
1059
         /* saturate to [0.0f, 1.0f] */
1060
         inst2 = tc_MOV(tc, tmp, coords[i]);
1061
         inst2->saturate = true;
1062
 
1063
         coords[i] = tsrc_from(tmp);
1064
      }
1065
   }
1066
 
1067
   /* set up sampler parameters */
1068
   if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
1069
      msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1070
            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1071
   }
1072
   else {
1073
      msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1074
            coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1075
   }
1076
 
1077
   /*
1078
    * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1079
    *
1080
    *     "The maximum message length allowed to the sampler is 11. This would
1081
    *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1082
    *      SIMD16."
1083
    */
1084
   if (msg_len > 11)
1085
      tc_fail(tc, "maximum length for messages to the sampler is 11");
1086
 
1087
   if (ret_sampler_index)
1088
      *ret_sampler_index = sampler_index;
1089
 
1090
   return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1091
         false, simd_mode, msg_type, sampler_index, binding_table_index);
1092
}
1093
 
1094
static void
1095
fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1096
                              struct toy_inst *inst)
1097
{
1098
   struct toy_compiler *tc = &fcc->tc;
1099
   struct toy_dst dst[4], tmp[4];
1100
   struct toy_src desc;
1101
   unsigned sampler_index;
1102
   int swizzles[4], i;
1103
   bool need_filter;
1104
 
1105
   desc = fs_prepare_tgsi_sampling(fcc, inst,
1106
         fcc->first_free_mrf,
1107
         fcc->variant->saturate_tex_coords,
1108
         &sampler_index);
1109
 
1110
   switch (inst->opcode) {
1111
   case TOY_OPCODE_TGSI_TXF:
1112
   case TOY_OPCODE_TGSI_TXQ:
1113
   case TOY_OPCODE_TGSI_TXQ_LZ:
1114
      need_filter = false;
1115
      break;
1116
   default:
1117
      need_filter = true;
1118
      break;
1119
   }
1120
 
1121
   toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER);
1122
   inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1123
   inst->src[1] = desc;
1124
   for (i = 2; i < Elements(inst->src); i++)
1125
      inst->src[i] = tsrc_null();
1126
 
1127
   /* write to temps first */
1128
   tc_alloc_tmp4(tc, tmp);
1129
   for (i = 0; i < 4; i++)
1130
      tmp[i].type = inst->dst.type;
1131
   tdst_transpose(inst->dst, dst);
1132
   inst->dst = tmp[0];
1133
 
1134
   tc_move_inst(tc, inst);
1135
 
1136
   if (need_filter) {
1137
      assert(sampler_index < fcc->variant->num_sampler_views);
1138
      swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1139
      swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1140
      swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1141
      swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1142
   }
1143
   else {
1144
      swizzles[0] = PIPE_SWIZZLE_RED;
1145
      swizzles[1] = PIPE_SWIZZLE_GREEN;
1146
      swizzles[2] = PIPE_SWIZZLE_BLUE;
1147
      swizzles[3] = PIPE_SWIZZLE_ALPHA;
1148
   }
1149
 
1150
   /* swizzle the results */
1151
   for (i = 0; i < 4; i++) {
1152
      switch (swizzles[i]) {
1153
      case PIPE_SWIZZLE_ZERO:
1154
         tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1155
         break;
1156
      case PIPE_SWIZZLE_ONE:
1157
         tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1158
         break;
1159
      default:
1160
         tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1161
         break;
1162
      }
1163
   }
1164
}
1165
 
1166
static void
1167
fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1168
{
1169
   struct toy_dst dst[4];
1170
   struct toy_src src[4];
1171
   int i;
1172
 
1173
   tdst_transpose(inst->dst, dst);
1174
   tsrc_transpose(inst->src[0], src);
1175
 
1176
   /*
1177
    * Every four fragments are from a 2x2 subspan, with
1178
    *
1179
    *   fragment 1 on the top-left,
1180
    *   fragment 2 on the top-right,
1181
    *   fragment 3 on the bottom-left,
1182
    *   fragment 4 on the bottom-right.
1183
    *
1184
    * DDX should thus produce
1185
    *
1186
    *   dst = src.yyww - src.xxzz
1187
    *
1188
    * and DDY should produce
1189
    *
1190
    *   dst = src.zzww - src.xxyy
1191
    *
1192
    * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to
1193
    * play with the region parameters.
1194
    */
1195
   if (inst->opcode == TOY_OPCODE_DDX) {
1196
      for (i = 0; i < 4; i++) {
1197
         struct toy_src left, right;
1198
 
1199
         left = tsrc_rect(src[i], TOY_RECT_220);
1200
         right = tsrc_offset(left, 0, 1);
1201
 
1202
         tc_ADD(tc, dst[i], right, tsrc_negate(left));
1203
      }
1204
   }
1205
   else {
1206
      for (i = 0; i < 4; i++) {
1207
         struct toy_src top, bottom;
1208
 
1209
         /* approximate with dst = src.zzzz - src.xxxx */
1210
         top = tsrc_rect(src[i], TOY_RECT_440);
1211
         bottom = tsrc_offset(top, 0, 2);
1212
 
1213
         tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1214
      }
1215
   }
1216
 
1217
   tc_discard_inst(tc, inst);
1218
}
1219
 
1220
static void
1221
fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1222
{
1223
   /* fs_write_fb() has set up the message registers */
1224
   toy_compiler_lower_to_send(tc, inst, true,
1225
         GEN6_SFID_DP_RC);
1226
}
1227
 
1228
static void
1229
fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1230
{
1231
   struct toy_dst pixel_mask_dst;
1232
   struct toy_src f0, pixel_mask;
1233
   struct toy_inst *tmp;
1234
 
1235
   /* lower half of r1.7:ud */
1236
   pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1237
   pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1238
 
1239
   f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010);
1240
 
1241
   /* KILL or KILL_IF */
1242
   if (tsrc_is_null(inst->src[0])) {
1243
      struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1244
      struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0));
1245
 
1246
      /* create a mask that masks out all pixels */
1247
      tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1248
      tmp->exec_size = GEN6_EXECSIZE_1;
1249
      tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1250
 
1251
      tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ);
1252
 
1253
      /* swapping the two src operands breaks glBitmap()!? */
1254
      tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1255
      tmp->exec_size = GEN6_EXECSIZE_1;
1256
      tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1257
   }
1258
   else {
1259
      struct toy_src src[4];
1260
      int i;
1261
 
1262
      tsrc_transpose(inst->src[0], src);
1263
      /* mask out killed pixels */
1264
      for (i = 0; i < 4; i++) {
1265
         tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1266
               GEN6_COND_GE);
1267
 
1268
         /* swapping the two src operands breaks glBitmap()!? */
1269
         tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1270
         tmp->exec_size = GEN6_EXECSIZE_1;
1271
         tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1272
      }
1273
   }
1274
 
1275
   tc_discard_inst(tc, inst);
1276
}
1277
 
1278
static void
1279
fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1280
{
1281
   struct toy_compiler *tc = &fcc->tc;
1282
   struct toy_inst *inst;
1283
 
1284
   /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1285
   tc_head(tc);
1286
   while ((inst = tc_next(tc)) != NULL) {
1287
      switch (inst->opcode) {
1288
      case TOY_OPCODE_TGSI_IN:
1289
      case TOY_OPCODE_TGSI_CONST:
1290
      case TOY_OPCODE_TGSI_SV:
1291
      case TOY_OPCODE_TGSI_IMM:
1292
         fs_lower_opcode_tgsi_direct(fcc, inst);
1293
         break;
1294
      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1295
      case TOY_OPCODE_TGSI_INDIRECT_STORE:
1296
         fs_lower_opcode_tgsi_indirect(fcc, inst);
1297
         break;
1298
      case TOY_OPCODE_TGSI_TEX:
1299
      case TOY_OPCODE_TGSI_TXB:
1300
      case TOY_OPCODE_TGSI_TXD:
1301
      case TOY_OPCODE_TGSI_TXL:
1302
      case TOY_OPCODE_TGSI_TXP:
1303
      case TOY_OPCODE_TGSI_TXF:
1304
      case TOY_OPCODE_TGSI_TXQ:
1305
      case TOY_OPCODE_TGSI_TXQ_LZ:
1306
      case TOY_OPCODE_TGSI_TEX2:
1307
      case TOY_OPCODE_TGSI_TXB2:
1308
      case TOY_OPCODE_TGSI_TXL2:
1309
      case TOY_OPCODE_TGSI_SAMPLE:
1310
      case TOY_OPCODE_TGSI_SAMPLE_I:
1311
      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1312
      case TOY_OPCODE_TGSI_SAMPLE_B:
1313
      case TOY_OPCODE_TGSI_SAMPLE_C:
1314
      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1315
      case TOY_OPCODE_TGSI_SAMPLE_D:
1316
      case TOY_OPCODE_TGSI_SAMPLE_L:
1317
      case TOY_OPCODE_TGSI_GATHER4:
1318
      case TOY_OPCODE_TGSI_SVIEWINFO:
1319
      case TOY_OPCODE_TGSI_SAMPLE_POS:
1320
      case TOY_OPCODE_TGSI_SAMPLE_INFO:
1321
         fs_lower_opcode_tgsi_sampling(fcc, inst);
1322
         break;
1323
      }
1324
   }
1325
 
1326
   tc_head(tc);
1327
   while ((inst = tc_next(tc)) != NULL) {
1328
      switch (inst->opcode) {
1329
      case TOY_OPCODE_INV:
1330
      case TOY_OPCODE_LOG:
1331
      case TOY_OPCODE_EXP:
1332
      case TOY_OPCODE_SQRT:
1333
      case TOY_OPCODE_RSQ:
1334
      case TOY_OPCODE_SIN:
1335
      case TOY_OPCODE_COS:
1336
      case TOY_OPCODE_FDIV:
1337
      case TOY_OPCODE_POW:
1338
      case TOY_OPCODE_INT_DIV_QUOTIENT:
1339
      case TOY_OPCODE_INT_DIV_REMAINDER:
1340
         toy_compiler_lower_math(tc, inst);
1341
         break;
1342
      case TOY_OPCODE_DDX:
1343
      case TOY_OPCODE_DDY:
1344
         fs_lower_opcode_derivative(tc, inst);
1345
         break;
1346
      case TOY_OPCODE_FB_WRITE:
1347
         fs_lower_opcode_fb_write(tc, inst);
1348
         break;
1349
      case TOY_OPCODE_KIL:
1350
         fs_lower_opcode_kil(tc, inst);
1351
         break;
1352
      default:
1353
         if (inst->opcode > 127)
1354
            tc_fail(tc, "unhandled virtual opcode");
1355
         break;
1356
      }
1357
   }
1358
}
1359
 
1360
/**
1361
 * Compile the shader.
1362
 */
1363
static bool
1364
fs_compile(struct fs_compile_context *fcc)
1365
{
1366
   struct toy_compiler *tc = &fcc->tc;
1367
   struct ilo_shader *sh = fcc->shader;
1368
 
1369
   fs_lower_virtual_opcodes(fcc);
1370
   toy_compiler_legalize_for_ra(tc);
1371
   toy_compiler_optimize(tc);
1372
   toy_compiler_allocate_registers(tc,
1373
         fcc->first_free_grf,
1374
         fcc->last_free_grf,
1375
         fcc->num_grf_per_vrf);
1376
   toy_compiler_legalize_for_asm(tc);
1377
 
1378
   if (tc->fail) {
1379
      ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1380
      return false;
1381
   }
1382
 
1383
   if (ilo_debug & ILO_DEBUG_FS) {
1384
      ilo_printf("legalized instructions:\n");
1385
      toy_compiler_dump(tc);
1386
      ilo_printf("\n");
1387
   }
1388
 
1389
   if (true) {
1390
      sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1391
   }
1392
   else {
1393
      static const uint32_t microcode[] = {
1394
         /* fill in the microcode here */
1395
         0x0, 0x0, 0x0, 0x0,
1396
      };
1397
      const bool swap = true;
1398
 
1399
      sh->kernel_size = sizeof(microcode);
1400
      sh->kernel = MALLOC(sh->kernel_size);
1401
 
1402
      if (sh->kernel) {
1403
         const int num_dwords = sizeof(microcode) / 4;
1404
         const uint32_t *src = microcode;
1405
         uint32_t *dst = (uint32_t *) sh->kernel;
1406
         int i;
1407
 
1408
         for (i = 0; i < num_dwords; i += 4) {
1409
            if (swap) {
1410
               dst[i + 0] = src[i + 3];
1411
               dst[i + 1] = src[i + 2];
1412
               dst[i + 2] = src[i + 1];
1413
               dst[i + 3] = src[i + 0];
1414
            }
1415
            else {
1416
               memcpy(dst, src, 16);
1417
            }
1418
         }
1419
      }
1420
   }
1421
 
1422
   if (!sh->kernel) {
1423
      ilo_err("failed to compile FS: %s\n", tc->reason);
1424
      return false;
1425
   }
1426
 
1427
   if (ilo_debug & ILO_DEBUG_FS) {
1428
      ilo_printf("disassembly:\n");
1429
      toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
1430
      ilo_printf("\n");
1431
   }
1432
 
1433
   return true;
1434
}
1435
 
1436
/**
1437
 * Emit instructions to write the color buffers (and the depth buffer).
1438
 */
1439
static void
1440
fs_write_fb(struct fs_compile_context *fcc)
1441
{
1442
   struct toy_compiler *tc = &fcc->tc;
1443
   int base_mrf = fcc->first_free_mrf;
1444
   const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1445
   bool header_present = false;
1446
   struct toy_src desc;
1447
   unsigned msg_type, ctrl;
1448
   int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1449
   int pos_slot = -1, cbuf, i;
1450
 
1451
   for (i = 0; i < Elements(color_slots); i++)
1452
      color_slots[i] = -1;
1453
 
1454
   for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1455
      if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1456
         assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
1457
         color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1458
      }
1459
      else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1460
         pos_slot = i;
1461
      }
1462
   }
1463
 
1464
   num_cbufs = fcc->variant->u.fs.num_cbufs;
1465
   /* still need to send EOT (and probably depth) */
1466
   if (!num_cbufs)
1467
      num_cbufs = 1;
1468
 
1469
   /* we need the header to specify the pixel mask or render target */
1470
   if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1471
      const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1472
      struct toy_inst *inst;
1473
 
1474
      inst = tc_MOV(tc, header, r0);
1475
      inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1476
      base_mrf += fcc->num_grf_per_vrf;
1477
 
1478
      /* this is a two-register header */
1479
      if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) {
1480
         inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1481
         inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1482
         base_mrf += fcc->num_grf_per_vrf;
1483
      }
1484
 
1485
      header_present = true;
1486
   }
1487
 
1488
   for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1489
      const int slot =
1490
         color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1491
      int mrf = base_mrf, vrf;
1492
      struct toy_src src[4];
1493
 
1494
      if (slot >= 0) {
1495
         const unsigned undefined_mask =
1496
            fcc->tgsi.outputs[slot].undefined_mask;
1497
         const int index = fcc->tgsi.outputs[slot].index;
1498
 
1499
         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1500
         if (vrf >= 0) {
1501
            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1502
            tsrc_transpose(tmp, src);
1503
         }
1504
         else {
1505
            /* use (0, 0, 0, 0) */
1506
            tsrc_transpose(tsrc_imm_f(0.0f), src);
1507
         }
1508
 
1509
         for (i = 0; i < 4; i++) {
1510
            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1511
 
1512
            if (undefined_mask & (1 << i))
1513
               src[i] = tsrc_imm_f(0.0f);
1514
 
1515
            tc_MOV(tc, dst, src[i]);
1516
 
1517
            mrf += fcc->num_grf_per_vrf;
1518
         }
1519
      }
1520
      else {
1521
         /* use (0, 0, 0, 0) */
1522
         for (i = 0; i < 4; i++) {
1523
            const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1524
 
1525
            tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1526
            mrf += fcc->num_grf_per_vrf;
1527
         }
1528
      }
1529
 
1530
      /* select BLEND_STATE[rt] */
1531
      if (cbuf > 0) {
1532
         struct toy_inst *inst;
1533
 
1534
         inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1535
         inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1536
         inst->exec_size = GEN6_EXECSIZE_1;
1537
         inst->src[0].rect = TOY_RECT_010;
1538
      }
1539
 
1540
      if (cbuf == 0 && pos_slot >= 0) {
1541
         const int index = fcc->tgsi.outputs[pos_slot].index;
1542
         const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1543
         struct toy_src src[4];
1544
         int vrf;
1545
 
1546
         vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1547
         if (vrf >= 0) {
1548
            const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1549
            tsrc_transpose(tmp, src);
1550
         }
1551
         else {
1552
            /* use (0, 0, 0, 0) */
1553
            tsrc_transpose(tsrc_imm_f(0.0f), src);
1554
         }
1555
 
1556
         /* only Z */
1557
         tc_MOV(tc, dst, src[2]);
1558
 
1559
         mrf += fcc->num_grf_per_vrf;
1560
      }
1561
 
1562
      msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ?
1563
         GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 :
1564
         GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8;
1565
 
1566
      ctrl = (cbuf == num_cbufs - 1) << 12 |
1567
             msg_type << 8;
1568
 
1569
      desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1570
            mrf - fcc->first_free_mrf, 0,
1571
            header_present, false,
1572
            GEN6_MSG_DP_RT_WRITE,
1573
            ctrl, fcc->shader->bt.rt_base + cbuf);
1574
 
1575
      tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1576
            tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1577
   }
1578
}
1579
 
1580
/**
1581
 * Set up shader outputs for fixed-function units.
1582
 */
1583
static void
1584
fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1585
{
1586
   int i;
1587
 
1588
   sh->out.count = tgsi->num_outputs;
1589
   for (i = 0; i < tgsi->num_outputs; i++) {
1590
      sh->out.register_indices[i] = tgsi->outputs[i].index;
1591
      sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1592
      sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1593
 
1594
      if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1595
         sh->out.has_pos = true;
1596
   }
1597
}
1598
 
1599
/**
1600
 * Set up shader inputs for fixed-function units.
1601
 */
1602
static void
1603
fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1604
                   bool flatshade)
1605
{
1606
   int i;
1607
 
1608
   sh->in.count = tgsi->num_inputs;
1609
   for (i = 0; i < tgsi->num_inputs; i++) {
1610
      sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1611
      sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1612
      sh->in.interp[i] = tgsi->inputs[i].interp;
1613
      sh->in.centroid[i] = tgsi->inputs[i].centroid;
1614
 
1615
      if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1616
         sh->in.has_pos = true;
1617
         continue;
1618
      }
1619
      else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1620
         continue;
1621
      }
1622
 
1623
      switch (tgsi->inputs[i].interp) {
1624
      case TGSI_INTERPOLATE_CONSTANT:
1625
         sh->in.const_interp_enable |= 1 << i;
1626
         break;
1627
      case TGSI_INTERPOLATE_LINEAR:
1628
         sh->in.has_linear_interp = true;
1629
 
1630
         if (tgsi->inputs[i].centroid) {
1631
            sh->in.barycentric_interpolation_mode |=
1632
               GEN6_INTERP_NONPERSPECTIVE_CENTROID;
1633
         }
1634
         else {
1635
            sh->in.barycentric_interpolation_mode |=
1636
               GEN6_INTERP_NONPERSPECTIVE_PIXEL;
1637
         }
1638
         break;
1639
      case TGSI_INTERPOLATE_COLOR:
1640
         if (flatshade) {
1641
            sh->in.const_interp_enable |= 1 << i;
1642
            break;
1643
         }
1644
         /* fall through */
1645
      case TGSI_INTERPOLATE_PERSPECTIVE:
1646
         if (tgsi->inputs[i].centroid) {
1647
            sh->in.barycentric_interpolation_mode |=
1648
               GEN6_INTERP_PERSPECTIVE_CENTROID;
1649
         }
1650
         else {
1651
            sh->in.barycentric_interpolation_mode |=
1652
               GEN6_INTERP_PERSPECTIVE_PIXEL;
1653
         }
1654
         break;
1655
      default:
1656
         break;
1657
      }
1658
   }
1659
}
1660
 
1661
static int
1662
fs_setup_payloads(struct fs_compile_context *fcc)
1663
{
1664
   const struct ilo_shader *sh = fcc->shader;
1665
   int grf, i;
1666
 
1667
   grf = 0;
1668
 
1669
   /* r0: header */
1670
   grf++;
1671
 
1672
   /* r1-r2: coordinates and etc. */
1673
   grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1;
1674
 
1675
   for (i = 0; i < Elements(fcc->payloads); i++) {
1676
      const int reg_scale =
1677
         (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2;
1678
 
1679
      /* r3-r26 or r32-r55: barycentric interpolation parameters */
1680
      if (sh->in.barycentric_interpolation_mode &
1681
            (GEN6_INTERP_PERSPECTIVE_PIXEL)) {
1682
         fcc->payloads[i].interp_perspective_pixel = grf;
1683
         grf += 2 * reg_scale;
1684
      }
1685
      if (sh->in.barycentric_interpolation_mode &
1686
            (GEN6_INTERP_PERSPECTIVE_CENTROID)) {
1687
         fcc->payloads[i].interp_perspective_centroid = grf;
1688
         grf += 2 * reg_scale;
1689
      }
1690
      if (sh->in.barycentric_interpolation_mode &
1691
            (GEN6_INTERP_PERSPECTIVE_SAMPLE)) {
1692
         fcc->payloads[i].interp_perspective_sample = grf;
1693
         grf += 2 * reg_scale;
1694
      }
1695
      if (sh->in.barycentric_interpolation_mode &
1696
            (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) {
1697
         fcc->payloads[i].interp_nonperspective_pixel = grf;
1698
         grf += 2 * reg_scale;
1699
      }
1700
      if (sh->in.barycentric_interpolation_mode &
1701
            (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) {
1702
         fcc->payloads[i].interp_nonperspective_centroid = grf;
1703
         grf += 2 * reg_scale;
1704
      }
1705
      if (sh->in.barycentric_interpolation_mode &
1706
            (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) {
1707
         fcc->payloads[i].interp_nonperspective_sample = grf;
1708
         grf += 2 * reg_scale;
1709
      }
1710
 
1711
      /* r27-r28 or r56-r57: interpoloated depth */
1712
      if (sh->in.has_pos) {
1713
         fcc->payloads[i].source_depth = grf;
1714
         grf += 1 * reg_scale;
1715
      }
1716
 
1717
      /* r29-r30 or r58-r59: interpoloated w */
1718
      if (sh->in.has_pos) {
1719
         fcc->payloads[i].source_w = grf;
1720
         grf += 1 * reg_scale;
1721
      }
1722
 
1723
      /* r31 or r60: position offset */
1724
      if (false) {
1725
         fcc->payloads[i].pos_offset = grf;
1726
         grf++;
1727
      }
1728
 
1729
      if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32)
1730
         break;
1731
   }
1732
 
1733
   return grf;
1734
}
1735
 
1736
/**
1737
 * Translate the TGSI tokens.
1738
 */
1739
static bool
1740
fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1741
              struct toy_tgsi *tgsi)
1742
{
1743
   if (ilo_debug & ILO_DEBUG_FS) {
1744
      ilo_printf("dumping fragment shader\n");
1745
      ilo_printf("\n");
1746
 
1747
      tgsi_dump(tokens, 0);
1748
      ilo_printf("\n");
1749
   }
1750
 
1751
   toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1752
   if (tc->fail) {
1753
      ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1754
      return false;
1755
   }
1756
 
1757
   if (ilo_debug & ILO_DEBUG_FS) {
1758
      ilo_printf("TGSI translator:\n");
1759
      toy_tgsi_dump(tgsi);
1760
      ilo_printf("\n");
1761
      toy_compiler_dump(tc);
1762
      ilo_printf("\n");
1763
   }
1764
 
1765
   return true;
1766
}
1767
 
1768
/**
1769
 * Set up FS compile context.  This includes translating the TGSI tokens.
1770
 */
1771
static bool
1772
fs_setup(struct fs_compile_context *fcc,
1773
         const struct ilo_shader_state *state,
1774
         const struct ilo_shader_variant *variant)
1775
{
1776
   int num_consts;
1777
 
1778
   memset(fcc, 0, sizeof(*fcc));
1779
 
1780
   fcc->shader = CALLOC_STRUCT(ilo_shader);
1781
   if (!fcc->shader)
1782
      return false;
1783
 
1784
   fcc->variant = variant;
1785
 
1786
   toy_compiler_init(&fcc->tc, state->info.dev);
1787
 
1788
   fcc->dispatch_mode = GEN6_PS_DISPATCH_8;
1789
 
1790
   fcc->tc.templ.access_mode = GEN6_ALIGN_1;
1791
   if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) {
1792
      fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H;
1793
      fcc->tc.templ.exec_size = GEN6_EXECSIZE_16;
1794
   }
1795
   else {
1796
      fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q;
1797
      fcc->tc.templ.exec_size = GEN6_EXECSIZE_8;
1798
   }
1799
 
1800
   fcc->tc.rect_linear_width = 8;
1801
 
1802
   /*
1803
    * The classic driver uses the sampler cache (gen6) or the data cache
1804
    * (gen7).  Why?
1805
    */
1806
   fcc->const_cache = GEN6_SFID_DP_CC;
1807
 
1808
   if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1809
      toy_compiler_cleanup(&fcc->tc);
1810
      FREE(fcc->shader);
1811
      return false;
1812
   }
1813
 
1814
   fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1815
   fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1816
 
1817
   if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1818
      num_consts = (fcc->tgsi.const_count + 1) / 2;
1819
 
1820
      /*
1821
       * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1822
       *
1823
       *     "The sum of all four read length fields (each incremented to
1824
       *      represent the actual read length) must be less than or equal to
1825
       *      64"
1826
       *
1827
       * Since we are usually under a high register pressure, do not allow
1828
       * for more than 8.
1829
       */
1830
      if (num_consts > 8)
1831
         num_consts = 0;
1832
   }
1833
   else {
1834
      num_consts = 0;
1835
   }
1836
 
1837
   fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1838
   fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1839
 
1840
   fcc->first_const_grf = fs_setup_payloads(fcc);
1841
   fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1842
   fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1843
   fcc->last_free_grf = 127;
1844
 
1845
   /* m0 is reserved for system routines */
1846
   fcc->first_free_mrf = 1;
1847
   fcc->last_free_mrf = 15;
1848
 
1849
   /* instructions are compressed with GEN6_EXECSIZE_16 */
1850
   fcc->num_grf_per_vrf =
1851
      (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1;
1852
 
1853
   if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) {
1854
      fcc->last_free_grf -= 15;
1855
      fcc->first_free_mrf = fcc->last_free_grf + 1;
1856
      fcc->last_free_mrf = fcc->first_free_mrf + 14;
1857
   }
1858
 
1859
   fcc->shader->in.start_grf = fcc->first_const_grf;
1860
   fcc->shader->has_kill = fcc->tgsi.uses_kill;
1861
   fcc->shader->dispatch_16 =
1862
      (fcc->dispatch_mode == GEN6_PS_DISPATCH_16);
1863
 
1864
   fcc->shader->bt.rt_base = 0;
1865
   fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs;
1866
   /* to send EOT */
1867
   if (!fcc->shader->bt.rt_count)
1868
      fcc->shader->bt.rt_count = 1;
1869
 
1870
   fcc->shader->bt.tex_base = fcc->shader->bt.rt_base +
1871
                              fcc->shader->bt.rt_count;
1872
   fcc->shader->bt.tex_count = fcc->variant->num_sampler_views;
1873
 
1874
   fcc->shader->bt.const_base = fcc->shader->bt.tex_base +
1875
                                fcc->shader->bt.tex_count;
1876
   fcc->shader->bt.const_count = state->info.constant_buffer_count;
1877
 
1878
   fcc->shader->bt.total_count = fcc->shader->bt.const_base +
1879
                                 fcc->shader->bt.const_count;
1880
 
1881
   return true;
1882
}
1883
 
1884
/**
1885
 * Compile the fragment shader.
1886
 */
1887
struct ilo_shader *
1888
ilo_shader_compile_fs(const struct ilo_shader_state *state,
1889
                      const struct ilo_shader_variant *variant)
1890
{
1891
   struct fs_compile_context fcc;
1892
 
1893
   if (!fs_setup(&fcc, state, variant))
1894
      return NULL;
1895
 
1896
   fs_write_fb(&fcc);
1897
 
1898
   if (!fs_compile(&fcc)) {
1899
      FREE(fcc.shader);
1900
      fcc.shader = NULL;
1901
   }
1902
 
1903
   toy_tgsi_cleanup(&fcc.tgsi);
1904
   toy_compiler_cleanup(&fcc.tc);
1905
 
1906
   return fcc.shader;
1907
}