Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5563 serge 1
/*
2
 * Mesa 3-D graphics library
3
 *
4
 * Copyright (C) 2012-2013 LunarG, Inc.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included
14
 * in all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22
 * DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Chia-I Wu 
26
 */
27
 
28
#include "toy_compiler.h"
29
 
30
#define CG_REG_SHIFT 5
31
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
32
 
33
struct codegen {
34
   const struct toy_inst *inst;
35
   int pc;
36
 
37
   unsigned flag_sub_reg_num;
38
 
39
   struct codegen_dst {
40
      unsigned file;
41
      unsigned type;
42
      bool indirect;
43
      unsigned indirect_subreg;
44
      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
45
 
46
      unsigned horz_stride;
47
 
48
      unsigned writemask;
49
   } dst;
50
 
51
   struct codegen_src {
52
      unsigned file;
53
      unsigned type;
54
      bool indirect;
55
      unsigned indirect_subreg;
56
      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
57
 
58
      unsigned vert_stride;
59
      unsigned width;
60
      unsigned horz_stride;
61
 
62
      unsigned swizzle[4];
63
      bool absolute;
64
      bool negate;
65
   } src[3];
66
};
67
 
68
/**
69
 * Return true if the source operand is null.
70
 */
71
static bool
72
src_is_null(const struct codegen *cg, int idx)
73
{
74
   const struct codegen_src *src = &cg->src[idx];
75
 
76
   return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
77
           src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
78
}
79
 
80
/**
81
 * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
82
 */
83
static uint32_t
84
translate_src(const struct codegen *cg, int idx)
85
{
86
   const struct codegen_src *src = &cg->src[idx];
87
   uint32_t dw;
88
 
89
   /* special treatment may be needed if any of the operand is immediate */
90
   if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
91
      assert(!cg->src[0].absolute && !cg->src[0].negate);
92
      /* only the last src operand can be an immediate */
93
      assert(src_is_null(cg, 1));
94
 
95
      if (idx == 0)
96
         return cg->flag_sub_reg_num << 25;
97
      else
98
         return cg->src[0].origin;
99
   }
100
   else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
101
      assert(!cg->src[1].absolute && !cg->src[1].negate);
102
      return cg->src[1].origin;
103
   }
104
 
105
   assert(src->file != BRW_IMMEDIATE_VALUE);
106
 
107
   if (src->indirect) {
108
      const int offset = (int) src->origin;
109
 
110
      assert(src->file == BRW_GENERAL_REGISTER_FILE);
111
      assert(offset < 512 && offset >= -512);
112
 
113
      if (cg->inst->access_mode == BRW_ALIGN_16) {
114
         assert(src->width == BRW_WIDTH_4);
115
         assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
116
 
117
         /* the lower 4 bits are reserved for the swizzle_[xy] */
118
         assert(!(src->origin & 0xf));
119
 
120
         dw = src->vert_stride << 21 |
121
              src->swizzle[3] << 18 |
122
              src->swizzle[2] << 16 |
123
              BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
124
              src->negate << 14 |
125
              src->absolute << 13 |
126
              src->indirect_subreg << 10 |
127
              (src->origin & 0x3f0) |
128
              src->swizzle[1] << 2 |
129
              src->swizzle[0];
130
      }
131
      else {
132
         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
133
                src->swizzle[1] == TOY_SWIZZLE_Y &&
134
                src->swizzle[2] == TOY_SWIZZLE_Z &&
135
                src->swizzle[3] == TOY_SWIZZLE_W);
136
 
137
         dw = src->vert_stride << 21 |
138
              src->width << 18 |
139
              src->horz_stride << 16 |
140
              BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
141
              src->negate << 14 |
142
              src->absolute << 13 |
143
              src->indirect_subreg << 10 |
144
              (src->origin & 0x3ff);
145
      }
146
   }
147
   else {
148
      switch (src->file) {
149
      case BRW_ARCHITECTURE_REGISTER_FILE:
150
         break;
151
      case BRW_GENERAL_REGISTER_FILE:
152
         assert(CG_REG_NUM(src->origin) < 128);
153
         break;
154
      case BRW_MESSAGE_REGISTER_FILE:
155
         assert(cg->inst->opcode == BRW_OPCODE_SEND ||
156
                cg->inst->opcode == BRW_OPCODE_SENDC);
157
         assert(CG_REG_NUM(src->origin) < 16);
158
         break;
159
      case BRW_IMMEDIATE_VALUE:
160
      default:
161
         assert(!"invalid src file");
162
         break;
163
      }
164
 
165
      if (cg->inst->access_mode == BRW_ALIGN_16) {
166
         assert(src->width == BRW_WIDTH_4);
167
         assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
168
 
169
         /* the lower 4 bits are reserved for the swizzle_[xy] */
170
         assert(!(src->origin & 0xf));
171
 
172
         dw = src->vert_stride << 21 |
173
              src->swizzle[3] << 18 |
174
              src->swizzle[2] << 16 |
175
              BRW_ADDRESS_DIRECT << 15 |
176
              src->negate << 14 |
177
              src->absolute << 13 |
178
              src->origin |
179
              src->swizzle[1] << 2 |
180
              src->swizzle[0];
181
      }
182
      else {
183
         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
184
                src->swizzle[1] == TOY_SWIZZLE_Y &&
185
                src->swizzle[2] == TOY_SWIZZLE_Z &&
186
                src->swizzle[3] == TOY_SWIZZLE_W);
187
 
188
         dw = src->vert_stride << 21 |
189
              src->width << 18 |
190
              src->horz_stride << 16 |
191
              BRW_ADDRESS_DIRECT << 15 |
192
              src->negate << 14 |
193
              src->absolute << 13 |
194
              src->origin;
195
      }
196
   }
197
 
198
   if (idx == 0)
199
      dw |= cg->flag_sub_reg_num << 25;
200
 
201
   return dw;
202
}
203
 
204
/**
205
 * Translate the destination operand to the higher 16 bits of DW1 of the
206
 * 1-src/2-src format.
207
 */
208
static uint16_t
209
translate_dst_region(const struct codegen *cg)
210
{
211
   const struct codegen_dst *dst = &cg->dst;
212
   uint16_t dw1_region;
213
 
214
   if (dst->file == BRW_IMMEDIATE_VALUE) {
215
      /* dst is immediate (JIP) when the opcode is a conditional branch */
216
      switch (cg->inst->opcode) {
217
      case BRW_OPCODE_IF:
218
      case BRW_OPCODE_ELSE:
219
      case BRW_OPCODE_ENDIF:
220
      case BRW_OPCODE_WHILE:
221
         assert(dst->type == BRW_REGISTER_TYPE_W);
222
         dw1_region = (dst->origin & 0xffff);
223
         break;
224
      default:
225
         assert(!"dst cannot be immediate");
226
         dw1_region = 0;
227
         break;
228
      }
229
 
230
      return dw1_region;
231
   }
232
 
233
   if (dst->indirect) {
234
      const int offset = (int) dst->origin;
235
 
236
      assert(dst->file == BRW_GENERAL_REGISTER_FILE);
237
      assert(offset < 512 && offset >= -512);
238
 
239
      if (cg->inst->access_mode == BRW_ALIGN_16) {
240
         /*
241
          * From the Sandy Bridge PRM, volume 4 part 2, page 144:
242
          *
243
          *     "Allthough Dst.HorzStride is a don't care for Align16, HW
244
          *      needs this to be programmed as 01."
245
          */
246
         assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
247
         /* the lower 4 bits are reserved for the writemask */
248
         assert(!(dst->origin & 0xf));
249
 
250
         dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
251
                      dst->horz_stride << 13 |
252
                      dst->indirect_subreg << 10 |
253
                      (dst->origin & 0x3f0) |
254
                      dst->writemask;
255
      }
256
      else {
257
         assert(dst->writemask == TOY_WRITEMASK_XYZW);
258
 
259
         dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
260
                      dst->horz_stride << 13 |
261
                      dst->indirect_subreg << 10 |
262
                      (dst->origin & 0x3ff);
263
      }
264
   }
265
   else {
266
      assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
267
              CG_REG_NUM(dst->origin) < 128) ||
268
             (dst->file == BRW_MESSAGE_REGISTER_FILE &&
269
              CG_REG_NUM(dst->origin) < 16) ||
270
             (dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
271
 
272
      if (cg->inst->access_mode == BRW_ALIGN_16) {
273
         /* similar to the indirect case */
274
         assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
275
         assert(!(dst->origin & 0xf));
276
 
277
         dw1_region = BRW_ADDRESS_DIRECT << 15 |
278
                      dst->horz_stride << 13 |
279
                      dst->origin |
280
                      dst->writemask;
281
      }
282
      else {
283
         assert(dst->writemask == TOY_WRITEMASK_XYZW);
284
 
285
         dw1_region = BRW_ADDRESS_DIRECT << 15 |
286
                      dst->horz_stride << 13 |
287
                      dst->origin;
288
      }
289
   }
290
 
291
   return dw1_region;
292
}
293
 
294
/**
295
 * Translate the destination operand to DW1 of the 1-src/2-src format.
296
 */
297
static uint32_t
298
translate_dst(const struct codegen *cg)
299
{
300
   return translate_dst_region(cg) << 16 |
301
          cg->src[1].type << 12 |
302
          cg->src[1].file << 10 |
303
          cg->src[0].type << 7 |
304
          cg->src[0].file << 5 |
305
          cg->dst.type << 2 |
306
          cg->dst.file;
307
}
308
 
309
/**
310
 * Translate the instruction to DW0 of the 1-src/2-src format.
311
 */
312
static uint32_t
313
translate_inst(const struct codegen *cg)
314
{
315
   const bool debug_ctrl = false;
316
   const bool cmpt_ctrl = false;
317
 
318
   assert(cg->inst->opcode < 128);
319
 
320
   return cg->inst->saturate << 31 |
321
          debug_ctrl << 30 |
322
          cmpt_ctrl << 29 |
323
          cg->inst->acc_wr_ctrl << 28 |
324
          cg->inst->cond_modifier << 24 |
325
          cg->inst->exec_size << 21 |
326
          cg->inst->pred_inv << 20 |
327
          cg->inst->pred_ctrl << 16 |
328
          cg->inst->thread_ctrl << 14 |
329
          cg->inst->qtr_ctrl << 12 |
330
          cg->inst->dep_ctrl << 10 |
331
          cg->inst->mask_ctrl << 9 |
332
          cg->inst->access_mode << 8 |
333
          cg->inst->opcode;
334
}
335
 
336
/**
337
 * Codegen an instruction in 1-src/2-src format.
338
 */
339
static void
340
codegen_inst(const struct codegen *cg, uint32_t *code)
341
{
342
   code[0] = translate_inst(cg);
343
   code[1] = translate_dst(cg);
344
   code[2] = translate_src(cg, 0);
345
   code[3] = translate_src(cg, 1);
346
   assert(src_is_null(cg, 2));
347
}
348
 
349
/**
350
 * Codegen an instruction in 3-src format.
351
 */
352
static void
353
codegen_inst_3src(const struct codegen *cg, uint32_t *code)
354
{
355
   const struct codegen_dst *dst = &cg->dst;
356
   uint32_t dw0, dw1, dw_src[3];
357
   int i;
358
 
359
   dw0 = translate_inst(cg);
360
 
361
   /*
362
    * 3-src instruction restrictions
363
    *
364
    *  - align16 with direct addressing
365
    *  - GRF or MRF dst
366
    *  - GRF src
367
    *  - sub_reg_num is DWORD aligned
368
    *  - no regioning except replication control
369
    *    (vert_stride == 0 && horz_stride == 0)
370
    */
371
   assert(cg->inst->access_mode == BRW_ALIGN_16);
372
 
373
   assert(!dst->indirect);
374
   assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
375
           CG_REG_NUM(dst->origin) < 128) ||
376
          (dst->file == BRW_MESSAGE_REGISTER_FILE &&
377
           CG_REG_NUM(dst->origin) < 16));
378
   assert(!(dst->origin & 0x3));
379
   assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
380
 
381
   dw1 = dst->origin << 19 |
382
         dst->writemask << 17 |
383
         cg->src[2].negate << 9 |
384
         cg->src[2].absolute << 8 |
385
         cg->src[1].negate << 7 |
386
         cg->src[1].absolute << 6 |
387
         cg->src[0].negate << 5 |
388
         cg->src[0].absolute << 4 |
389
         cg->flag_sub_reg_num << 1 |
390
         (dst->file == BRW_MESSAGE_REGISTER_FILE);
391
 
392
   for (i = 0; i < 3; i++) {
393
      const struct codegen_src *src = &cg->src[i];
394
 
395
      assert(!src->indirect);
396
      assert(src->file == BRW_GENERAL_REGISTER_FILE &&
397
             CG_REG_NUM(src->origin) < 128);
398
      assert(!(src->origin & 0x3));
399
 
400
      assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
401
              src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
402
             (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
403
              src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
404
      assert(src->width == BRW_WIDTH_4);
405
 
406
      dw_src[i] = src->origin << 7 |
407
                  src->swizzle[3] << 7 |
408
                  src->swizzle[2] << 5 |
409
                  src->swizzle[1] << 3 |
410
                  src->swizzle[0] << 1 |
411
                  (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
412
                   src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
413
 
414
      /* only the lower 20 bits are used */
415
      assert((dw_src[i] & 0xfffff) == dw_src[i]);
416
   }
417
 
418
   code[0] = dw0;
419
   code[1] = dw1;
420
   /* concatenate the bits of dw_src */
421
   code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
422
   code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
423
}
424
 
425
/**
426
 * Sanity check the region parameters of the operands.
427
 */
428
static void
429
codegen_validate_region_restrictions(const struct codegen *cg)
430
{
431
   const int exec_size_map[] = {
432
      [BRW_EXECUTE_1] = 1,
433
      [BRW_EXECUTE_2] = 2,
434
      [BRW_EXECUTE_4] = 4,
435
      [BRW_EXECUTE_8] = 8,
436
      [BRW_EXECUTE_16] = 16,
437
      [BRW_EXECUTE_32] = 32,
438
   };
439
   const int width_map[] = {
440
      [BRW_WIDTH_1] = 1,
441
      [BRW_WIDTH_2] = 2,
442
      [BRW_WIDTH_4] = 4,
443
      [BRW_WIDTH_8] = 8,
444
      [BRW_WIDTH_16] = 16,
445
   };
446
   const int horz_stride_map[] = {
447
      [BRW_HORIZONTAL_STRIDE_0] = 0,
448
      [BRW_HORIZONTAL_STRIDE_1] = 1,
449
      [BRW_HORIZONTAL_STRIDE_2] = 2,
450
      [BRW_HORIZONTAL_STRIDE_4] = 4,
451
   };
452
   const int vert_stride_map[] = {
453
      [BRW_VERTICAL_STRIDE_0] = 0,
454
      [BRW_VERTICAL_STRIDE_1] = 1,
455
      [BRW_VERTICAL_STRIDE_2] = 2,
456
      [BRW_VERTICAL_STRIDE_4] = 4,
457
      [BRW_VERTICAL_STRIDE_8] = 8,
458
      [BRW_VERTICAL_STRIDE_16] = 16,
459
      [BRW_VERTICAL_STRIDE_32] = 32,
460
      [BRW_VERTICAL_STRIDE_64] = 64,
461
      [BRW_VERTICAL_STRIDE_128] = 128,
462
      [BRW_VERTICAL_STRIDE_256] = 256,
463
      [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
464
   };
465
   const int exec_size = exec_size_map[cg->inst->exec_size];
466
   int i;
467
 
468
   /* Sandy Bridge PRM, volume 4 part 2, page 94 */
469
 
470
   /* 1. (we don't do 32 anyway) */
471
   assert(exec_size <= 16);
472
 
473
   for (i = 0; i < Elements(cg->src); i++) {
474
      const int width = width_map[cg->src[i].width];
475
      const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
476
      const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
477
 
478
      if (src_is_null(cg, i))
479
         break;
480
 
481
      /* 3. */
482
      assert(exec_size >= width);
483
 
484
      if (exec_size == width) {
485
         /* 4. & 5. */
486
         if (horz_stride)
487
            assert(vert_stride == width * horz_stride);
488
      }
489
 
490
      if (width == 1) {
491
         /* 6. */
492
         assert(horz_stride == 0);
493
 
494
         /* 7. */
495
         if (exec_size == 1)
496
            assert(vert_stride == 0);
497
      }
498
 
499
      /* 8. */
500
      if (!vert_stride && !horz_stride)
501
         assert(width == 1);
502
   }
503
 
504
   /* derived from 10.1.2. & 10.2. */
505
   assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
506
}
507
 
508
static unsigned
509
translate_vfile(enum toy_file file)
510
{
511
   switch (file) {
512
   case TOY_FILE_ARF:   return BRW_ARCHITECTURE_REGISTER_FILE;
513
   case TOY_FILE_GRF:   return BRW_GENERAL_REGISTER_FILE;
514
   case TOY_FILE_MRF:   return BRW_MESSAGE_REGISTER_FILE;
515
   case TOY_FILE_IMM:   return BRW_IMMEDIATE_VALUE;
516
   default:
517
      assert(!"unhandled toy file");
518
      return BRW_GENERAL_REGISTER_FILE;
519
   }
520
}
521
 
522
static unsigned
523
translate_vtype(enum toy_type type)
524
{
525
   switch (type) {
526
   case TOY_TYPE_F:     return BRW_REGISTER_TYPE_F;
527
   case TOY_TYPE_D:     return BRW_REGISTER_TYPE_D;
528
   case TOY_TYPE_UD:    return BRW_REGISTER_TYPE_UD;
529
   case TOY_TYPE_W:     return BRW_REGISTER_TYPE_W;
530
   case TOY_TYPE_UW:    return BRW_REGISTER_TYPE_UW;
531
   case TOY_TYPE_V:     return BRW_REGISTER_TYPE_V;
532
   default:
533
      assert(!"unhandled toy type");
534
      return BRW_REGISTER_TYPE_F;
535
   }
536
}
537
 
538
static unsigned
539
translate_writemask(enum toy_writemask writemask)
540
{
541
   /* TOY_WRITEMASK_* are compatible with the hardware definitions */
542
   assert(writemask <= 0xf);
543
   return writemask;
544
}
545
 
546
static unsigned
547
translate_swizzle(enum toy_swizzle swizzle)
548
{
549
   /* TOY_SWIZZLE_* are compatible with the hardware definitions */
550
   assert(swizzle <= 3);
551
   return swizzle;
552
}
553
 
554
/**
555
 * Prepare for generating an instruction.
556
 */
557
static void
558
codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
559
                int pc, int rect_linear_width)
560
{
561
   int i;
562
 
563
   cg->inst = inst;
564
   cg->pc = pc;
565
 
566
   cg->flag_sub_reg_num = 0;
567
 
568
   cg->dst.file = translate_vfile(inst->dst.file);
569
   cg->dst.type = translate_vtype(inst->dst.type);
570
   cg->dst.indirect = inst->dst.indirect;
571
   cg->dst.indirect_subreg = inst->dst.indirect_subreg;
572
   cg->dst.origin = inst->dst.val32;
573
 
574
   /*
575
    * From the Sandy Bridge PRM, volume 4 part 2, page 81:
576
    *
577
    *     "For a word or an unsigned word immediate data, software must
578
    *      replicate the same 16-bit immediate value to both the lower word
579
    *      and the high word of the 32-bit immediate field in an instruction."
580
    */
581
   if (inst->dst.file == TOY_FILE_IMM) {
582
      switch (inst->dst.type) {
583
      case TOY_TYPE_W:
584
      case TOY_TYPE_UW:
585
         cg->dst.origin &= 0xffff;
586
         cg->dst.origin |= cg->dst.origin << 16;
587
         break;
588
      default:
589
         break;
590
      }
591
   }
592
 
593
   cg->dst.writemask = translate_writemask(inst->dst.writemask);
594
 
595
   switch (inst->dst.rect) {
596
   case TOY_RECT_LINEAR:
597
      cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
598
      break;
599
   default:
600
      assert(!"unsupported dst region");
601
      cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
602
      break;
603
   }
604
 
605
   for (i = 0; i < Elements(cg->src); i++) {
606
      struct codegen_src *src = &cg->src[i];
607
 
608
      src->file = translate_vfile(inst->src[i].file);
609
      src->type = translate_vtype(inst->src[i].type);
610
      src->indirect = inst->src[i].indirect;
611
      src->indirect_subreg = inst->src[i].indirect_subreg;
612
      src->origin = inst->src[i].val32;
613
 
614
      /* do the same for src */
615
      if (inst->dst.file == TOY_FILE_IMM) {
616
         switch (inst->src[i].type) {
617
         case TOY_TYPE_W:
618
         case TOY_TYPE_UW:
619
            src->origin &= 0xffff;
620
            src->origin |= src->origin << 16;
621
            break;
622
         default:
623
            break;
624
         }
625
      }
626
 
627
      src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
628
      src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
629
      src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
630
      src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
631
      src->absolute = inst->src[i].absolute;
632
      src->negate = inst->src[i].negate;
633
 
634
      switch (inst->src[i].rect) {
635
      case TOY_RECT_LINEAR:
636
         switch (rect_linear_width) {
637
         case 1:
638
            src->vert_stride = BRW_VERTICAL_STRIDE_1;
639
            src->width = BRW_WIDTH_1;
640
            break;
641
         case 2:
642
            src->vert_stride = BRW_VERTICAL_STRIDE_2;
643
            src->width = BRW_WIDTH_2;
644
            break;
645
         case 4:
646
            src->vert_stride = BRW_VERTICAL_STRIDE_4;
647
            src->width = BRW_WIDTH_4;
648
            break;
649
         case 8:
650
            src->vert_stride = BRW_VERTICAL_STRIDE_8;
651
            src->width = BRW_WIDTH_8;
652
            break;
653
         case 16:
654
            src->vert_stride = BRW_VERTICAL_STRIDE_16;
655
            src->width = BRW_WIDTH_16;
656
            break;
657
         default:
658
            assert(!"unsupported TOY_RECT_LINEAR width");
659
            src->vert_stride = BRW_VERTICAL_STRIDE_1;
660
            src->width = BRW_WIDTH_1;
661
            break;
662
         }
663
         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
664
         break;
665
      case TOY_RECT_041:
666
         src->vert_stride = BRW_VERTICAL_STRIDE_0;
667
         src->width = BRW_WIDTH_4;
668
         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
669
         break;
670
      case TOY_RECT_010:
671
         src->vert_stride = BRW_VERTICAL_STRIDE_0;
672
         src->width = BRW_WIDTH_1;
673
         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
674
         break;
675
      case TOY_RECT_220:
676
         src->vert_stride = BRW_VERTICAL_STRIDE_2;
677
         src->width = BRW_WIDTH_2;
678
         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
679
         break;
680
      case TOY_RECT_440:
681
         src->vert_stride = BRW_VERTICAL_STRIDE_4;
682
         src->width = BRW_WIDTH_4;
683
         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
684
         break;
685
      case TOY_RECT_240:
686
         src->vert_stride = BRW_VERTICAL_STRIDE_2;
687
         src->width = BRW_WIDTH_4;
688
         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
689
         break;
690
      default:
691
         assert(!"unsupported src region");
692
         src->vert_stride = BRW_VERTICAL_STRIDE_1;
693
         src->width = BRW_WIDTH_1;
694
         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
695
         break;
696
      }
697
   }
698
}
699
 
700
/**
701
 * Generate HW shader code.  The instructions should have been legalized.
702
 */
703
void *
704
toy_compiler_assemble(struct toy_compiler *tc, int *size)
705
{
706
   const struct toy_inst *inst;
707
   uint32_t *code;
708
   int pc;
709
 
710
   code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
711
   if (!code)
712
      return NULL;
713
 
714
   pc = 0;
715
   tc_head(tc);
716
   while ((inst = tc_next(tc)) != NULL) {
717
      uint32_t *dw = &code[pc * 4];
718
      struct codegen cg;
719
 
720
      if (pc >= tc->num_instructions) {
721
         tc_fail(tc, "wrong instructoun count");
722
         break;
723
      }
724
 
725
      codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
726
      codegen_validate_region_restrictions(&cg);
727
 
728
      switch (inst->opcode) {
729
      case BRW_OPCODE_MAD:
730
         codegen_inst_3src(&cg, dw);
731
         break;
732
      default:
733
         codegen_inst(&cg, dw);
734
         break;
735
      }
736
 
737
      pc++;
738
   }
739
 
740
   /* never return an invalid kernel */
741
   if (tc->fail) {
742
      FREE(code);
743
      return NULL;
744
   }
745
 
746
   if (size)
747
      *size = pc * 4 * sizeof(uint32_t);
748
 
749
   return code;
750
}