Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "toy_compiler.h"
  29.  
  30. #define CG_REG_SHIFT 5
  31. #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
  32.  
  33. struct codegen {
  34.    const struct ilo_dev *dev;
  35.    const struct toy_inst *inst;
  36.    int pc;
  37.  
  38.    unsigned flag_reg_num;
  39.    unsigned flag_sub_reg_num;
  40.  
  41.    struct codegen_dst {
  42.       unsigned file;
  43.       unsigned type;
  44.       bool indirect;
  45.       unsigned indirect_subreg;
  46.       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
  47.  
  48.       unsigned horz_stride;
  49.  
  50.       unsigned writemask;
  51.    } dst;
  52.  
  53.    struct codegen_src {
  54.       unsigned file;
  55.       unsigned type;
  56.       bool indirect;
  57.       unsigned indirect_subreg;
  58.       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
  59.  
  60.       unsigned vert_stride;
  61.       unsigned width;
  62.       unsigned horz_stride;
  63.  
  64.       unsigned swizzle[4];
  65.       bool absolute;
  66.       bool negate;
  67.    } src[3];
  68. };
  69.  
  70. /*
  71.  * From the Sandy Bridge PRM, volume 4 part 2, page 107-108:
  72.  *
  73.  *     "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up
  74.  *      result forms bits [88:77], the source 0 register region fields, of the
  75.  *      128-bit instruction word."
  76.  *
  77.  *     "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit
  78.  *      table-look-up result forms bits [100:96], [68,64] and [52,48] of the
  79.  *      128-bit instruction word."
  80.  *
  81.  *     "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit
  82.  *      table-look-up result forms bits [63:61] and [46, 32] of the 128-bit
  83.  *      instruction word."
  84.  *
  85.  *     "(ControlIndex) The 5-bit index for data type fields. The 17-bit
  86.  *      table-look-up result forms bits[31], and [23, 8] of the 128-bit
  87.  *      instruction word."
  88.  */
  89. static const struct toy_compaction_table toy_compaction_table_gen6 = {
  90.    .control = {
  91.       [0]   = 0x00000,  /* 00000000000000000 */
  92.       [1]   = 0x08000,  /* 01000000000000000 */
  93.       [2]   = 0x06000,  /* 00110000000000000 */
  94.       [3]   = 0x00100,  /* 00000000100000000 */
  95.       [4]   = 0x02000,  /* 00010000000000000 */
  96.       [5]   = 0x01100,  /* 00001000100000000 */
  97.       [6]   = 0x00102,  /* 00000000100000010 */
  98.       [7]   = 0x00002,  /* 00000000000000010 */
  99.       [8]   = 0x08100,  /* 01000000100000000 */
  100.       [9]   = 0x0a000,  /* 01010000000000000 */
  101.       [10]  = 0x16000,  /* 10110000000000000 */
  102.       [11]  = 0x04000,  /* 00100000000000000 */
  103.       [12]  = 0x1a000,  /* 11010000000000000 */
  104.       [13]  = 0x18000,  /* 11000000000000000 */
  105.       [14]  = 0x09100,  /* 01001000100000000 */
  106.       [15]  = 0x08008,  /* 01000000000001000 */
  107.       [16]  = 0x08004,  /* 01000000000000100 */
  108.       [17]  = 0x00008,  /* 00000000000001000 */
  109.       [18]  = 0x00004,  /* 00000000000000100 */
  110.       [19]  = 0x01100,  /* 00111000100000000 */
  111.       [20]  = 0x01102,  /* 00001000100000010 */
  112.       [21]  = 0x06100,  /* 00110000100000000 */
  113.       [22]  = 0x06001,  /* 00110000000000001 */
  114.       [23]  = 0x04001,  /* 00100000000000001 */
  115.       [24]  = 0x06002,  /* 00110000000000010 */
  116.       [25]  = 0x06005,  /* 00110000000000101 */
  117.       [26]  = 0x06009,  /* 00110000000001001 */
  118.       [27]  = 0x06010,  /* 00110000000010000 */
  119.       [28]  = 0x06003,  /* 00110000000000011 */
  120.       [29]  = 0x06004,  /* 00110000000000100 */
  121.       [30]  = 0x06108,  /* 00110000100001000 */
  122.       [31]  = 0x04009,  /* 00100000000001001 */
  123.    },
  124.    .datatype = {
  125.       [0]   = 0x09c00,  /* 001001110000000000 */
  126.       [1]   = 0x08c20,  /* 001000110000100000 */
  127.       [2]   = 0x09c01,  /* 001001110000000001 */
  128.       [3]   = 0x08060,  /* 001000000001100000 */
  129.       [4]   = 0x0ad29,  /* 001010110100101001 */
  130.       [5]   = 0x081ad,  /* 001000000110101101 */
  131.       [6]   = 0x0c62c,  /* 001100011000101100 */
  132.       [7]   = 0x0bdad,  /* 001011110110101101 */
  133.       [8]   = 0x081ec,  /* 001000000111101100 */
  134.       [9]   = 0x08061,  /* 001000000001100001 */
  135.       [10]  = 0x08ca5,  /* 001000110010100101 */
  136.       [11]  = 0x08041,  /* 001000000001000001 */
  137.       [12]  = 0x08231,  /* 001000001000110001 */
  138.       [13]  = 0x08229,  /* 001000001000101001 */
  139.       [14]  = 0x08020,  /* 001000000000100000 */
  140.       [15]  = 0x08232,  /* 001000001000110010 */
  141.       [16]  = 0x0a529,  /* 001010010100101001 */
  142.       [17]  = 0x0b4a5,  /* 001011010010100101 */
  143.       [18]  = 0x081a5,  /* 001000000110100101 */
  144.       [19]  = 0x0c629,  /* 001100011000101001 */
  145.       [20]  = 0x0b62c,  /* 001011011000101100 */
  146.       [21]  = 0x0b5a5,  /* 001011010110100101 */
  147.       [22]  = 0x0bda5,  /* 001011110110100101 */
  148.       [23]  = 0x0f1bd,  /* 001111011110111101 */
  149.       [24]  = 0x0f1bc,  /* 001111011110111100 */
  150.       [25]  = 0x0f1bd,  /* 001111011110111101 */
  151.       [26]  = 0x0f19d,  /* 001111011110011101 */
  152.       [27]  = 0x0f1be,  /* 001111011110111110 */
  153.       [28]  = 0x08021,  /* 001000000000100001 */
  154.       [29]  = 0x08022,  /* 001000000000100010 */
  155.       [30]  = 0x09fdd,  /* 001001111111011101 */
  156.       [31]  = 0x083be,  /* 001000001110111110 */
  157.    },
  158.    .subreg = {
  159.       [0]   = 0x0000,   /* 000000000000000 */
  160.       [1]   = 0x0004,   /* 000000000000100 */
  161.       [2]   = 0x0180,   /* 000000110000000 */
  162.       [3]   = 0x1000,   /* 111000000000000 */
  163.       [4]   = 0x3c08,   /* 011110000001000 */
  164.       [5]   = 0x0400,   /* 000010000000000 */
  165.       [6]   = 0x0010,   /* 000000000010000 */
  166.       [7]   = 0x0c0c,   /* 000110000001100 */
  167.       [8]   = 0x1000,   /* 001000000000000 */
  168.       [9]   = 0x0200,   /* 000001000000000 */
  169.       [10]  = 0x0294,   /* 000001010010100 */
  170.       [11]  = 0x0056,   /* 000000001010110 */
  171.       [12]  = 0x2000,   /* 010000000000000 */
  172.       [13]  = 0x6000,   /* 110000000000000 */
  173.       [14]  = 0x0800,   /* 000100000000000 */
  174.       [15]  = 0x0080,   /* 000000010000000 */
  175.       [16]  = 0x0008,   /* 000000000001000 */
  176.       [17]  = 0x4000,   /* 100000000000000 */
  177.       [18]  = 0x0280,   /* 000001010000000 */
  178.       [19]  = 0x1400,   /* 001010000000000 */
  179.       [20]  = 0x1800,   /* 001100000000000 */
  180.       [21]  = 0x0054,   /* 000000001010100 */
  181.       [22]  = 0x5a94,   /* 101101010010100 */
  182.       [23]  = 0x2800,   /* 010100000000000 */
  183.       [24]  = 0x008f,   /* 000000010001111 */
  184.       [25]  = 0x3000,   /* 011000000000000 */
  185.       [26]  = 0x1c00,   /* 111110000000000 */
  186.       [27]  = 0x5000,   /* 101000000000000 */
  187.       [28]  = 0x000f,   /* 000000000001111 */
  188.       [29]  = 0x088f,   /* 000100010001111 */
  189.       [30]  = 0x108f,   /* 001000010001111 */
  190.       [31]  = 0x0c00,   /* 000110000000000 */
  191.    },
  192.    .src = {
  193.       [0]   = 0x000,    /* 000000000000 */
  194.       [1]   = 0x588,    /* 010110001000 */
  195.       [2]   = 0x468,    /* 010001101000 */
  196.       [3]   = 0x228,    /* 001000101000 */
  197.       [4]   = 0x690,    /* 011010010000 */
  198.       [5]   = 0x120,    /* 000100100000 */
  199.       [6]   = 0x46c,    /* 010001101100 */
  200.       [7]   = 0x510,    /* 010101110000 */
  201.       [8]   = 0x618,    /* 011001111000 */
  202.       [9]   = 0x328,    /* 001100101000 */
  203.       [10]  = 0x58c,    /* 010110001100 */
  204.       [11]  = 0x220,    /* 001000100000 */
  205.       [12]  = 0x58a,    /* 010110001010 */
  206.       [13]  = 0x002,    /* 000000000010 */
  207.       [14]  = 0x550,    /* 010101010000 */
  208.       [15]  = 0x568,    /* 010101101000 */
  209.       [16]  = 0xf4c,    /* 111101001100 */
  210.       [17]  = 0xf2c,    /* 111100101100 */
  211.       [18]  = 0x610,    /* 011001110000 */
  212.       [19]  = 0x589,    /* 010110001001 */
  213.       [20]  = 0x558,    /* 010101011000 */
  214.       [21]  = 0x348,    /* 001101001000 */
  215.       [22]  = 0x42c,    /* 010000101100 */
  216.       [23]  = 0x400,    /* 010000000000 */
  217.       [24]  = 0x310,    /* 001101110000 */
  218.       [25]  = 0x310,    /* 001100010000 */
  219.       [26]  = 0x300,    /* 001100000000 */
  220.       [27]  = 0x46a,    /* 010001101010 */
  221.       [28]  = 0x318,    /* 001101111000 */
  222.       [29]  = 0x010,    /* 000001110000 */
  223.       [30]  = 0x320,    /* 001100100000 */
  224.       [31]  = 0x350,    /* 001101010000 */
  225.    },
  226. };
  227.  
  228. /*
  229.  * From the Ivy Bridge PRM, volume 4 part 3, page 128:
  230.  *
  231.  *     "(Src0Index) Lookup one of 32 12-bit values. That value is used (from
  232.  *      MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride,
  233.  *      Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields."
  234.  *
  235.  *     "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from
  236.  *      MSB to LSB) for various fields for Src1, Src0, and Dst, including
  237.  *      ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending
  238.  *      on AddrMode and AccessMode.
  239.  *
  240.  *     "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used
  241.  *      (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType,
  242.  *      Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and
  243.  *      Src1.RegType bit fields."
  244.  *
  245.  *     "(ControlIndex) Lookup one of 32 19-bit values. That value is used
  246.  *      (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate,
  247.  *      ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl,
  248.  *      and AccessMode bit fields."
  249.  */
  250. static const struct toy_compaction_table toy_compaction_table_gen7 = {
  251.    .control = {
  252.       [0]   = 0x00002,  /* 0000000000000000010 */
  253.       [1]   = 0x04000,  /* 0000100000000000000 */
  254.       [2]   = 0x04001,  /* 0000100000000000001 */
  255.       [3]   = 0x04002,  /* 0000100000000000010 */
  256.       [4]   = 0x04003,  /* 0000100000000000011 */
  257.       [5]   = 0x04004,  /* 0000100000000000100 */
  258.       [6]   = 0x04005,  /* 0000100000000000101 */
  259.       [7]   = 0x04007,  /* 0000100000000000111 */
  260.       [8]   = 0x04008,  /* 0000100000000001000 */
  261.       [9]   = 0x04009,  /* 0000100000000001001 */
  262.       [10]  = 0x0400d,  /* 0000100000000001101 */
  263.       [11]  = 0x06000,  /* 0000110000000000000 */
  264.       [12]  = 0x06001,  /* 0000110000000000001 */
  265.       [13]  = 0x06002,  /* 0000110000000000010 */
  266.       [14]  = 0x06003,  /* 0000110000000000011 */
  267.       [15]  = 0x06004,  /* 0000110000000000100 */
  268.       [16]  = 0x06005,  /* 0000110000000000101 */
  269.       [17]  = 0x06007,  /* 0000110000000000111 */
  270.       [18]  = 0x06009,  /* 0000110000000001001 */
  271.       [19]  = 0x0600d,  /* 0000110000000001101 */
  272.       [20]  = 0x06010,  /* 0000110000000010000 */
  273.       [21]  = 0x06100,  /* 0000110000100000000 */
  274.       [22]  = 0x08000,  /* 0001000000000000000 */
  275.       [23]  = 0x08002,  /* 0001000000000000010 */
  276.       [24]  = 0x08004,  /* 0001000000000000100 */
  277.       [25]  = 0x08100,  /* 0001000000100000000 */
  278.       [26]  = 0x16000,  /* 0010110000000000000 */
  279.       [27]  = 0x16010,  /* 0010110000000010000 */
  280.       [28]  = 0x18000,  /* 0011000000000000000 */
  281.       [29]  = 0x18100,  /* 0011000000100000000 */
  282.       [30]  = 0x28000,  /* 0101000000000000000 */
  283.       [31]  = 0x28100,  /* 0101000000100000000 */
  284.    },
  285.    .datatype = {
  286.       [0]   = 0x08001,  /* 001000000000000001 */
  287.       [1]   = 0x08020,  /* 001000000000100000 */
  288.       [2]   = 0x08021,  /* 001000000000100001 */
  289.       [3]   = 0x08061,  /* 001000000001100001 */
  290.       [4]   = 0x080bd,  /* 001000000010111101 */
  291.       [5]   = 0x082fd,  /* 001000001011111101 */
  292.       [6]   = 0x083a1,  /* 001000001110100001 */
  293.       [7]   = 0x083a5,  /* 001000001110100101 */
  294.       [8]   = 0x083bd,  /* 001000001110111101 */
  295.       [9]   = 0x08421,  /* 001000010000100001 */
  296.       [10]  = 0x08c20,  /* 001000110000100000 */
  297.       [11]  = 0x08c21,  /* 001000110000100001 */
  298.       [12]  = 0x094a5,  /* 001001010010100101 */
  299.       [13]  = 0x09ca4,  /* 001001110010100100 */
  300.       [14]  = 0x09ca5,  /* 001001110010100101 */
  301.       [15]  = 0x0f3bd,  /* 001111001110111101 */
  302.       [16]  = 0x0f79d,  /* 001111011110011101 */
  303.       [17]  = 0x0f7bc,  /* 001111011110111100 */
  304.       [18]  = 0x0f7bd,  /* 001111011110111101 */
  305.       [19]  = 0x0ffbc,  /* 001111111110111100 */
  306.       [20]  = 0x0020c,  /* 000000001000001100 */
  307.       [21]  = 0x0803d,  /* 001000000000111101 */
  308.       [22]  = 0x080a5,  /* 001000000010100101 */
  309.       [23]  = 0x08420,  /* 001000010000100000 */
  310.       [24]  = 0x094a4,  /* 001001010010100100 */
  311.       [25]  = 0x09c84,  /* 001001110010000100 */
  312.       [26]  = 0x0a509,  /* 001010010100001001 */
  313.       [27]  = 0x0dfbd,  /* 001101111110111101 */
  314.       [28]  = 0x0ffbd,  /* 001111111110111101 */
  315.       [29]  = 0x0bdac,  /* 001011110110101100 */
  316.       [30]  = 0x0a528,  /* 001010010100101000 */
  317.       [31]  = 0x0ad28,  /* 001010110100101000 */
  318.    },
  319.    .subreg = {
  320.       [0]   = 0x0000,   /* 000000000000000 */
  321.       [1]   = 0x0001,   /* 000000000000001 */
  322.       [2]   = 0x0008,   /* 000000000001000 */
  323.       [3]   = 0x000f,   /* 000000000001111 */
  324.       [4]   = 0x0010,   /* 000000000010000 */
  325.       [5]   = 0x0080,   /* 000000010000000 */
  326.       [6]   = 0x0100,   /* 000000100000000 */
  327.       [7]   = 0x0180,   /* 000000110000000 */
  328.       [8]   = 0x0200,   /* 000001000000000 */
  329.       [9]   = 0x0210,   /* 000001000010000 */
  330.       [10]  = 0x0280,   /* 000001010000000 */
  331.       [11]  = 0x1000,   /* 001000000000000 */
  332.       [12]  = 0x1001,   /* 001000000000001 */
  333.       [13]  = 0x1081,   /* 001000010000001 */
  334.       [14]  = 0x1082,   /* 001000010000010 */
  335.       [15]  = 0x1083,   /* 001000010000011 */
  336.       [16]  = 0x1084,   /* 001000010000100 */
  337.       [17]  = 0x1087,   /* 001000010000111 */
  338.       [18]  = 0x1088,   /* 001000010001000 */
  339.       [19]  = 0x108e,   /* 001000010001110 */
  340.       [20]  = 0x108f,   /* 001000010001111 */
  341.       [21]  = 0x1180,   /* 001000110000000 */
  342.       [22]  = 0x11e8,   /* 001000111101000 */
  343.       [23]  = 0x2000,   /* 010000000000000 */
  344.       [24]  = 0x2180,   /* 010000110000000 */
  345.       [25]  = 0x3000,   /* 011000000000000 */
  346.       [26]  = 0x3c87,   /* 011110010000111 */
  347.       [27]  = 0x4000,   /* 100000000000000 */
  348.       [28]  = 0x5000,   /* 101000000000000 */
  349.       [29]  = 0x6000,   /* 110000000000000 */
  350.       [30]  = 0x7000,   /* 111000000000000 */
  351.       [31]  = 0x701c,   /* 111000000011100 */
  352.    },
  353.    .src = {
  354.       [0]   = 0x000,    /* 000000000000 */
  355.       [1]   = 0x002,    /* 000000000010 */
  356.       [2]   = 0x010,    /* 000000010000 */
  357.       [3]   = 0x012,    /* 000000010010 */
  358.       [4]   = 0x018,    /* 000000011000 */
  359.       [5]   = 0x020,    /* 000000100000 */
  360.       [6]   = 0x028,    /* 000000101000 */
  361.       [7]   = 0x048,    /* 000001001000 */
  362.       [8]   = 0x050,    /* 000001010000 */
  363.       [9]   = 0x070,    /* 000001110000 */
  364.       [10]  = 0x078,    /* 000001111000 */
  365.       [11]  = 0x300,    /* 001100000000 */
  366.       [12]  = 0x302,    /* 001100000010 */
  367.       [13]  = 0x308,    /* 001100001000 */
  368.       [14]  = 0x310,    /* 001100010000 */
  369.       [15]  = 0x312,    /* 001100010010 */
  370.       [16]  = 0x320,    /* 001100100000 */
  371.       [17]  = 0x328,    /* 001100101000 */
  372.       [18]  = 0x338,    /* 001100111000 */
  373.       [19]  = 0x340,    /* 001101000000 */
  374.       [20]  = 0x342,    /* 001101000010 */
  375.       [21]  = 0x348,    /* 001101001000 */
  376.       [22]  = 0x350,    /* 001101010000 */
  377.       [23]  = 0x360,    /* 001101100000 */
  378.       [24]  = 0x368,    /* 001101101000 */
  379.       [25]  = 0x370,    /* 001101110000 */
  380.       [26]  = 0x371,    /* 001101110001 */
  381.       [27]  = 0x378,    /* 001101111000 */
  382.       [28]  = 0x468,    /* 010001101000 */
  383.       [29]  = 0x469,    /* 010001101001 */
  384.       [30]  = 0x46a,    /* 010001101010 */
  385.       [31]  = 0x588,    /* 010110001000 */
  386.    },
  387. };
  388.  
  389. static const struct toy_compaction_table toy_compaction_table_gen8 = {
  390.    .control = {
  391.    },
  392.    .datatype = {
  393.    },
  394.    .subreg = {
  395.    },
  396.    .src = {
  397.    },
  398.    .control_3src = {
  399.    },
  400.    .source_3src = {
  401.    },
  402. };
  403.  
  404. const struct toy_compaction_table *
  405. toy_compiler_get_compaction_table(const struct ilo_dev *dev)
  406. {
  407.    switch (ilo_dev_gen(dev)) {
  408.    case ILO_GEN(8):
  409.       return &toy_compaction_table_gen8;
  410.    case ILO_GEN(7.5):
  411.    case ILO_GEN(7):
  412.       return &toy_compaction_table_gen7;
  413.    case ILO_GEN(6):
  414.       return &toy_compaction_table_gen6;
  415.    default:
  416.       assert(!"unsupported gen");
  417.       return NULL;
  418.    }
  419. }
  420.  
  421. /**
  422.  * Return true if the source operand is null.
  423.  */
  424. static bool
  425. src_is_null(const struct codegen *cg, int idx)
  426. {
  427.    const struct codegen_src *src = &cg->src[idx];
  428.  
  429.    return (src->file == GEN6_FILE_ARF &&
  430.            src->origin == GEN6_ARF_NULL << CG_REG_SHIFT);
  431. }
  432.  
  433. /**
  434.  * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
  435.  */
  436. static uint32_t
  437. translate_src_gen6(const struct codegen *cg, int idx)
  438. {
  439.    const struct codegen_src *src = &cg->src[idx];
  440.    uint32_t dw;
  441.  
  442.    ILO_DEV_ASSERT(cg->dev, 6, 8);
  443.  
  444.    /* special treatment may be needed if any of the operand is immediate */
  445.    if (cg->src[0].file == GEN6_FILE_IMM) {
  446.       assert(!cg->src[0].absolute && !cg->src[0].negate);
  447.  
  448.       /* only the last src operand can be an immediate unless it is Gen8+ */
  449.       assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1));
  450.  
  451.       if (!src_is_null(cg, 1))
  452.          return cg->src[idx].origin;
  453.  
  454.       if (idx == 0) {
  455.          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  456.             return cg->src[1].type << 27 |
  457.                    cg->src[1].file << 25;
  458.          } else {
  459.             return cg->flag_sub_reg_num << 25;
  460.          }
  461.       } else {
  462.          return cg->src[0].origin;
  463.       }
  464.    }
  465.    else if (idx && cg->src[1].file == GEN6_FILE_IMM) {
  466.       assert(!cg->src[1].absolute && !cg->src[1].negate);
  467.       return cg->src[1].origin;
  468.    }
  469.  
  470.    assert(src->file != GEN6_FILE_IMM);
  471.  
  472.    if (src->indirect) {
  473.       const int offset = (int) src->origin;
  474.  
  475.       assert(src->file == GEN6_FILE_GRF);
  476.       assert(offset < 512 && offset >= -512);
  477.  
  478.       if (cg->inst->access_mode == GEN6_ALIGN_16) {
  479.          assert(src->width == GEN6_WIDTH_4);
  480.          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
  481.  
  482.          /* the lower 4 bits are reserved for the swizzle_[xy] */
  483.          assert(!(src->origin & 0xf));
  484.  
  485.          dw = src->vert_stride << 21 |
  486.               src->swizzle[3] << 18 |
  487.               src->swizzle[2] << 16 |
  488.               GEN6_ADDRMODE_INDIRECT << 15 |
  489.               src->negate << 14 |
  490.               src->absolute << 13 |
  491.               src->swizzle[1] << 2 |
  492.               src->swizzle[0];
  493.          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  494.             dw |= src->indirect_subreg << 9 |
  495.                   (src->origin & 0x1f0);
  496.          } else {
  497.             dw |= src->indirect_subreg << 10 |
  498.                   (src->origin & 0x3f0);
  499.          }
  500.       }
  501.       else {
  502.          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
  503.                 src->swizzle[1] == TOY_SWIZZLE_Y &&
  504.                 src->swizzle[2] == TOY_SWIZZLE_Z &&
  505.                 src->swizzle[3] == TOY_SWIZZLE_W);
  506.  
  507.          dw = src->vert_stride << 21 |
  508.               src->width << 18 |
  509.               src->horz_stride << 16 |
  510.               GEN6_ADDRMODE_INDIRECT << 15 |
  511.               src->negate << 14 |
  512.               src->absolute << 13;
  513.          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  514.             dw |= src->indirect_subreg << 9 |
  515.                   (src->origin & 0x1ff);
  516.          } else {
  517.             dw |= src->indirect_subreg << 10 |
  518.                   (src->origin & 0x3ff);
  519.          }
  520.       }
  521.    }
  522.    else {
  523.       switch (src->file) {
  524.       case GEN6_FILE_ARF:
  525.          break;
  526.       case GEN6_FILE_GRF:
  527.          assert(CG_REG_NUM(src->origin) < 128);
  528.          break;
  529.       case GEN6_FILE_MRF:
  530.          assert(cg->inst->opcode == GEN6_OPCODE_SEND ||
  531.                 cg->inst->opcode == GEN6_OPCODE_SENDC);
  532.          assert(CG_REG_NUM(src->origin) < 16);
  533.          break;
  534.       case GEN6_FILE_IMM:
  535.       default:
  536.          assert(!"invalid src file");
  537.          break;
  538.       }
  539.  
  540.       if (cg->inst->access_mode == GEN6_ALIGN_16) {
  541.          assert(src->width == GEN6_WIDTH_4);
  542.          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
  543.  
  544.          /* the lower 4 bits are reserved for the swizzle_[xy] */
  545.          assert(!(src->origin & 0xf));
  546.  
  547.          dw = src->vert_stride << 21 |
  548.               src->swizzle[3] << 18 |
  549.               src->swizzle[2] << 16 |
  550.               GEN6_ADDRMODE_DIRECT << 15 |
  551.               src->negate << 14 |
  552.               src->absolute << 13 |
  553.               src->origin |
  554.               src->swizzle[1] << 2 |
  555.               src->swizzle[0];
  556.       }
  557.       else {
  558.          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
  559.                 src->swizzle[1] == TOY_SWIZZLE_Y &&
  560.                 src->swizzle[2] == TOY_SWIZZLE_Z &&
  561.                 src->swizzle[3] == TOY_SWIZZLE_W);
  562.  
  563.          dw = src->vert_stride << 21 |
  564.               src->width << 18 |
  565.               src->horz_stride << 16 |
  566.               GEN6_ADDRMODE_DIRECT << 15 |
  567.               src->negate << 14 |
  568.               src->absolute << 13 |
  569.               src->origin;
  570.       }
  571.    }
  572.  
  573.    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  574.       const bool indirect_origin_bit9 = (cg->dst.indirect) ?
  575.          (src->origin & 0x200) : 0;
  576.  
  577.       if (idx == 0) {
  578.          dw |= indirect_origin_bit9 << 31 |
  579.                cg->src[1].type << 27 |
  580.                cg->src[1].file << 25;
  581.       } else {
  582.          dw |= indirect_origin_bit9 << 25;
  583.       }
  584.    } else {
  585.       if (idx == 0)
  586.          dw |= cg->flag_sub_reg_num << 25;
  587.    }
  588.  
  589.    return dw;
  590. }
  591.  
  592. /**
  593.  * Translate the destination operand to the higher 16 bits of DW1 of the
  594.  * 1-src/2-src format.
  595.  */
  596. static uint16_t
  597. translate_dst_region_gen6(const struct codegen *cg)
  598. {
  599.    const struct codegen_dst *dst = &cg->dst;
  600.    uint16_t dw1_region;
  601.  
  602.    ILO_DEV_ASSERT(cg->dev, 6, 8);
  603.  
  604.    if (dst->file == GEN6_FILE_IMM) {
  605.       /* dst is immediate (JIP) when the opcode is a conditional branch */
  606.       switch (cg->inst->opcode) {
  607.       case GEN6_OPCODE_IF:
  608.       case GEN6_OPCODE_ELSE:
  609.       case GEN6_OPCODE_ENDIF:
  610.       case GEN6_OPCODE_WHILE:
  611.          assert(dst->type == GEN6_TYPE_W);
  612.          dw1_region = (dst->origin & 0xffff);
  613.          break;
  614.       default:
  615.          assert(!"dst cannot be immediate");
  616.          dw1_region = 0;
  617.          break;
  618.       }
  619.  
  620.       return dw1_region;
  621.    }
  622.  
  623.    if (dst->indirect) {
  624.       const int offset = (int) dst->origin;
  625.  
  626.       assert(dst->file == GEN6_FILE_GRF);
  627.       assert(offset < 512 && offset >= -512);
  628.  
  629.       if (cg->inst->access_mode == GEN6_ALIGN_16) {
  630.          /*
  631.           * From the Sandy Bridge PRM, volume 4 part 2, page 144:
  632.           *
  633.           *     "Allthough Dst.HorzStride is a don't care for Align16, HW
  634.           *      needs this to be programmed as 01."
  635.           */
  636.          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
  637.          /* the lower 4 bits are reserved for the writemask */
  638.          assert(!(dst->origin & 0xf));
  639.  
  640.          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
  641.                       dst->horz_stride << 13 |
  642.                       dst->writemask;
  643.          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  644.             dw1_region |= dst->indirect_subreg << 9 |
  645.                           (dst->origin & 0x1f0);
  646.          } else {
  647.             dw1_region |= dst->indirect_subreg << 10 |
  648.                           (dst->origin & 0x3f0);
  649.          }
  650.       }
  651.       else {
  652.          assert(dst->writemask == TOY_WRITEMASK_XYZW);
  653.  
  654.          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
  655.                       dst->horz_stride << 13;
  656.          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  657.             dw1_region |= dst->indirect_subreg << 9 |
  658.                           (dst->origin & 0x1ff);
  659.          } else {
  660.             dw1_region |= dst->indirect_subreg << 10 |
  661.                           (dst->origin & 0x3ff);
  662.          }
  663.       }
  664.    }
  665.    else {
  666.       assert((dst->file == GEN6_FILE_GRF &&
  667.               CG_REG_NUM(dst->origin) < 128) ||
  668.              (dst->file == GEN6_FILE_MRF &&
  669.               CG_REG_NUM(dst->origin) < 16) ||
  670.              (dst->file == GEN6_FILE_ARF));
  671.  
  672.       if (cg->inst->access_mode == GEN6_ALIGN_16) {
  673.          /* similar to the indirect case */
  674.          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
  675.          assert(!(dst->origin & 0xf));
  676.  
  677.          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
  678.                       dst->horz_stride << 13 |
  679.                       dst->origin |
  680.                       dst->writemask;
  681.       }
  682.       else {
  683.          assert(dst->writemask == TOY_WRITEMASK_XYZW);
  684.  
  685.          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
  686.                       dst->horz_stride << 13 |
  687.                       dst->origin;
  688.       }
  689.    }
  690.  
  691.    return dw1_region;
  692. }
  693.  
  694. /**
  695.  * Translate the destination operand to DW1 of the 1-src/2-src format.
  696.  */
  697. static uint32_t
  698. translate_dst_gen6(const struct codegen *cg)
  699. {
  700.    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
  701.  
  702.    return translate_dst_region_gen6(cg) << 16 |
  703.           cg->src[1].type << 12 |
  704.           cg->src[1].file << 10 |
  705.           cg->src[0].type << 7 |
  706.           cg->src[0].file << 5 |
  707.           cg->dst.type << 2 |
  708.           cg->dst.file;
  709. }
  710.  
  711. static uint32_t
  712. translate_dst_gen8(const struct codegen *cg)
  713. {
  714.    const bool indirect_origin_bit9 = (cg->dst.indirect) ?
  715.       (cg->dst.origin & 0x200) : 0;
  716.  
  717.    ILO_DEV_ASSERT(cg->dev, 8, 8);
  718.  
  719.    return translate_dst_region_gen6(cg) << 16 |
  720.           indirect_origin_bit9 << 15 |
  721.           cg->src[0].type << 11 |
  722.           cg->src[0].file << 9 |
  723.           cg->dst.type << 5 |
  724.           cg->dst.file << 3 |
  725.           cg->inst->mask_ctrl << 2 |
  726.           cg->flag_reg_num << 1 |
  727.           cg->flag_sub_reg_num;
  728. }
  729.  
  730. /**
  731.  * Translate the instruction to DW0 of the 1-src/2-src format.
  732.  */
  733. static uint32_t
  734. translate_inst_gen6(const struct codegen *cg)
  735. {
  736.    const bool debug_ctrl = false;
  737.    const bool cmpt_ctrl = false;
  738.  
  739.    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
  740.  
  741.    assert(cg->inst->opcode < 128);
  742.  
  743.    return cg->inst->saturate << 31 |
  744.           debug_ctrl << 30 |
  745.           cmpt_ctrl << 29 |
  746.           cg->inst->acc_wr_ctrl << 28 |
  747.           cg->inst->cond_modifier << 24 |
  748.           cg->inst->exec_size << 21 |
  749.           cg->inst->pred_inv << 20 |
  750.           cg->inst->pred_ctrl << 16 |
  751.           cg->inst->thread_ctrl << 14 |
  752.           cg->inst->qtr_ctrl << 12 |
  753.           cg->inst->dep_ctrl << 10 |
  754.           cg->inst->mask_ctrl << 9 |
  755.           cg->inst->access_mode << 8 |
  756.           cg->inst->opcode;
  757. }
  758.  
  759. static uint32_t
  760. translate_inst_gen8(const struct codegen *cg)
  761. {
  762.    const bool debug_ctrl = false;
  763.    const bool cmpt_ctrl = false;
  764.  
  765.    ILO_DEV_ASSERT(cg->dev, 8, 8);
  766.  
  767.    assert(cg->inst->opcode < 128);
  768.  
  769.    return cg->inst->saturate << 31 |
  770.           debug_ctrl << 30 |
  771.           cmpt_ctrl << 29 |
  772.           cg->inst->acc_wr_ctrl << 28 |
  773.           cg->inst->cond_modifier << 24 |
  774.           cg->inst->exec_size << 21 |
  775.           cg->inst->pred_inv << 20 |
  776.           cg->inst->pred_ctrl << 16 |
  777.           cg->inst->thread_ctrl << 14 |
  778.           cg->inst->qtr_ctrl << 12 |
  779.           cg->inst->dep_ctrl << 9 |
  780.           cg->inst->access_mode << 8 |
  781.           cg->inst->opcode;
  782. }
  783.  
  784. /**
  785.  * Codegen an instruction in 1-src/2-src format.
  786.  */
  787. static void
  788. codegen_inst_gen6(const struct codegen *cg, uint32_t *code)
  789. {
  790.    ILO_DEV_ASSERT(cg->dev, 6, 8);
  791.  
  792.    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  793.       code[0] = translate_inst_gen8(cg);
  794.       code[1] = translate_dst_gen8(cg);
  795.    } else {
  796.       code[0] = translate_inst_gen6(cg);
  797.       code[1] = translate_dst_gen6(cg);
  798.    }
  799.  
  800.    code[2] = translate_src_gen6(cg, 0);
  801.    code[3] = translate_src_gen6(cg, 1);
  802.    assert(src_is_null(cg, 2));
  803. }
  804.  
  805. /**
  806.  * Codegen an instruction in 3-src format.
  807.  */
  808. static void
  809. codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code)
  810. {
  811.    const struct codegen_dst *dst = &cg->dst;
  812.    uint32_t dw0, dw1, dw_src[3];
  813.    int i;
  814.  
  815.    ILO_DEV_ASSERT(cg->dev, 6, 8);
  816.  
  817.    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8))
  818.       dw0 = translate_inst_gen8(cg);
  819.    else
  820.       dw0 = translate_inst_gen6(cg);
  821.  
  822.    /*
  823.     * 3-src instruction restrictions
  824.     *
  825.     *  - align16 with direct addressing
  826.     *  - GRF or MRF dst
  827.     *  - GRF src
  828.     *  - sub_reg_num is DWORD aligned
  829.     *  - no regioning except replication control
  830.     *    (vert_stride == 0 && horz_stride == 0)
  831.     */
  832.    assert(cg->inst->access_mode == GEN6_ALIGN_16);
  833.  
  834.    assert(!dst->indirect);
  835.    assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) ||
  836.           (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16));
  837.    assert(!(dst->origin & 0x3));
  838.    assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
  839.  
  840.    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
  841.       dw1 = dst->origin << 19 |
  842.             dst->writemask << 17 |
  843.             cg->src[2].negate << 10 |
  844.             cg->src[2].negate << 10 |
  845.             cg->src[2].absolute << 9 |
  846.             cg->src[1].negate << 8 |
  847.             cg->src[1].absolute << 7 |
  848.             cg->src[0].negate << 6 |
  849.             cg->src[0].absolute << 5 |
  850.             cg->inst->mask_ctrl << 2 |
  851.             cg->flag_reg_num << 1 |
  852.             cg->flag_sub_reg_num;
  853.    } else {
  854.       dw1 = dst->origin << 19 |
  855.             dst->writemask << 17 |
  856.             cg->src[2].negate << 9 |
  857.             cg->src[2].absolute << 8 |
  858.             cg->src[1].negate << 7 |
  859.             cg->src[1].absolute << 6 |
  860.             cg->src[0].negate << 5 |
  861.             cg->src[0].absolute << 4 |
  862.             cg->flag_sub_reg_num << 1 |
  863.             (dst->file == GEN6_FILE_MRF);
  864.    }
  865.  
  866.    for (i = 0; i < 3; i++) {
  867.       const struct codegen_src *src = &cg->src[i];
  868.  
  869.       assert(!src->indirect);
  870.       assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128);
  871.       assert(!(src->origin & 0x3));
  872.  
  873.       assert((src->vert_stride == GEN6_VERTSTRIDE_4 &&
  874.               src->horz_stride == GEN6_HORZSTRIDE_1) ||
  875.              (src->vert_stride == GEN6_VERTSTRIDE_0 &&
  876.               src->horz_stride == GEN6_HORZSTRIDE_0));
  877.       assert(src->width == GEN6_WIDTH_4);
  878.  
  879.       dw_src[i] = src->origin << 7 |
  880.                   src->swizzle[3] << 7 |
  881.                   src->swizzle[2] << 5 |
  882.                   src->swizzle[1] << 3 |
  883.                   src->swizzle[0] << 1 |
  884.                   (src->vert_stride == GEN6_VERTSTRIDE_0 &&
  885.                    src->horz_stride == GEN6_HORZSTRIDE_0);
  886.  
  887.       /* only the lower 20 bits are used */
  888.       assert((dw_src[i] & 0xfffff) == dw_src[i]);
  889.    }
  890.  
  891.    code[0] = dw0;
  892.    code[1] = dw1;
  893.    /* concatenate the bits of dw_src */
  894.    code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
  895.    code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
  896. }
  897.  
  898. /**
  899.  * Sanity check the region parameters of the operands.
  900.  */
  901. static void
  902. codegen_validate_region_restrictions(const struct codegen *cg)
  903. {
  904.    const int exec_size_map[] = {
  905.       [GEN6_EXECSIZE_1] = 1,
  906.       [GEN6_EXECSIZE_2] = 2,
  907.       [GEN6_EXECSIZE_4] = 4,
  908.       [GEN6_EXECSIZE_8] = 8,
  909.       [GEN6_EXECSIZE_16] = 16,
  910.       [GEN6_EXECSIZE_32] = 32,
  911.    };
  912.    const int width_map[] = {
  913.       [GEN6_WIDTH_1] = 1,
  914.       [GEN6_WIDTH_2] = 2,
  915.       [GEN6_WIDTH_4] = 4,
  916.       [GEN6_WIDTH_8] = 8,
  917.       [GEN6_WIDTH_16] = 16,
  918.    };
  919.    const int horz_stride_map[] = {
  920.       [GEN6_HORZSTRIDE_0] = 0,
  921.       [GEN6_HORZSTRIDE_1] = 1,
  922.       [GEN6_HORZSTRIDE_2] = 2,
  923.       [GEN6_HORZSTRIDE_4] = 4,
  924.    };
  925.    const int vert_stride_map[] = {
  926.       [GEN6_VERTSTRIDE_0] = 0,
  927.       [GEN6_VERTSTRIDE_1] = 1,
  928.       [GEN6_VERTSTRIDE_2] = 2,
  929.       [GEN6_VERTSTRIDE_4] = 4,
  930.       [GEN6_VERTSTRIDE_8] = 8,
  931.       [GEN6_VERTSTRIDE_16] = 16,
  932.       [GEN6_VERTSTRIDE_32] = 32,
  933.       [7] = 64,
  934.       [8] = 128,
  935.       [9] = 256,
  936.       [GEN6_VERTSTRIDE_VXH] = 0,
  937.    };
  938.    const int exec_size = exec_size_map[cg->inst->exec_size];
  939.    int i;
  940.  
  941.    /* Sandy Bridge PRM, volume 4 part 2, page 94 */
  942.  
  943.    /* 1. (we don't do 32 anyway) */
  944.    assert(exec_size <= 16);
  945.  
  946.    for (i = 0; i < Elements(cg->src); i++) {
  947.       const int width = width_map[cg->src[i].width];
  948.       const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
  949.       const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
  950.  
  951.       if (src_is_null(cg, i))
  952.          break;
  953.  
  954.       /* 3. */
  955.       assert(exec_size >= width);
  956.  
  957.       if (exec_size == width) {
  958.          /* 4. & 5. */
  959.          if (horz_stride)
  960.             assert(vert_stride == width * horz_stride);
  961.       }
  962.  
  963.       if (width == 1) {
  964.          /* 6. */
  965.          assert(horz_stride == 0);
  966.  
  967.          /* 7. */
  968.          if (exec_size == 1)
  969.             assert(vert_stride == 0);
  970.       }
  971.  
  972.       /* 8. */
  973.       if (!vert_stride && !horz_stride)
  974.          assert(width == 1);
  975.    }
  976.  
  977.    /* derived from 10.1.2. & 10.2. */
  978.    assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0);
  979. }
  980.  
  981. static unsigned
  982. translate_vfile(enum toy_file file)
  983. {
  984.    switch (file) {
  985.    case TOY_FILE_ARF:   return GEN6_FILE_ARF;
  986.    case TOY_FILE_GRF:   return GEN6_FILE_GRF;
  987.    case TOY_FILE_MRF:   return GEN6_FILE_MRF;
  988.    case TOY_FILE_IMM:   return GEN6_FILE_IMM;
  989.    default:
  990.       assert(!"unhandled toy file");
  991.       return GEN6_FILE_GRF;
  992.    }
  993. }
  994.  
  995. static unsigned
  996. translate_vtype(enum toy_type type)
  997. {
  998.    switch (type) {
  999.    case TOY_TYPE_F:     return GEN6_TYPE_F;
  1000.    case TOY_TYPE_D:     return GEN6_TYPE_D;
  1001.    case TOY_TYPE_UD:    return GEN6_TYPE_UD;
  1002.    case TOY_TYPE_W:     return GEN6_TYPE_W;
  1003.    case TOY_TYPE_UW:    return GEN6_TYPE_UW;
  1004.    case TOY_TYPE_V:     return GEN6_TYPE_V_IMM;
  1005.    default:
  1006.       assert(!"unhandled toy type");
  1007.       return GEN6_TYPE_F;
  1008.    }
  1009. }
  1010.  
  1011. static unsigned
  1012. translate_writemask(enum toy_writemask writemask)
  1013. {
  1014.    /* TOY_WRITEMASK_* are compatible with the hardware definitions */
  1015.    assert(writemask <= 0xf);
  1016.    return writemask;
  1017. }
  1018.  
  1019. static unsigned
  1020. translate_swizzle(enum toy_swizzle swizzle)
  1021. {
  1022.    /* TOY_SWIZZLE_* are compatible with the hardware definitions */
  1023.    assert(swizzle <= 3);
  1024.    return swizzle;
  1025. }
  1026.  
  1027. /**
  1028.  * Prepare for generating an instruction.
  1029.  */
  1030. static void
  1031. codegen_prepare(struct codegen *cg, const struct ilo_dev *dev,
  1032.                 const struct toy_inst *inst, int pc, int rect_linear_width)
  1033. {
  1034.    int i;
  1035.  
  1036.    cg->dev = dev;
  1037.    cg->inst = inst;
  1038.    cg->pc = pc;
  1039.  
  1040.    cg->flag_reg_num = 0;
  1041.    cg->flag_sub_reg_num = 0;
  1042.  
  1043.    cg->dst.file = translate_vfile(inst->dst.file);
  1044.    cg->dst.type = translate_vtype(inst->dst.type);
  1045.    cg->dst.indirect = inst->dst.indirect;
  1046.    cg->dst.indirect_subreg = inst->dst.indirect_subreg;
  1047.    cg->dst.origin = inst->dst.val32;
  1048.  
  1049.    /*
  1050.     * From the Sandy Bridge PRM, volume 4 part 2, page 81:
  1051.     *
  1052.     *     "For a word or an unsigned word immediate data, software must
  1053.     *      replicate the same 16-bit immediate value to both the lower word
  1054.     *      and the high word of the 32-bit immediate field in an instruction."
  1055.     */
  1056.    if (inst->dst.file == TOY_FILE_IMM) {
  1057.       switch (inst->dst.type) {
  1058.       case TOY_TYPE_W:
  1059.       case TOY_TYPE_UW:
  1060.          cg->dst.origin &= 0xffff;
  1061.          cg->dst.origin |= cg->dst.origin << 16;
  1062.          break;
  1063.       default:
  1064.          break;
  1065.       }
  1066.    }
  1067.  
  1068.    cg->dst.writemask = translate_writemask(inst->dst.writemask);
  1069.  
  1070.    switch (inst->dst.rect) {
  1071.    case TOY_RECT_LINEAR:
  1072.       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
  1073.       break;
  1074.    default:
  1075.       assert(!"unsupported dst region");
  1076.       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
  1077.       break;
  1078.    }
  1079.  
  1080.    for (i = 0; i < Elements(cg->src); i++) {
  1081.       struct codegen_src *src = &cg->src[i];
  1082.  
  1083.       src->file = translate_vfile(inst->src[i].file);
  1084.       src->type = translate_vtype(inst->src[i].type);
  1085.       src->indirect = inst->src[i].indirect;
  1086.       src->indirect_subreg = inst->src[i].indirect_subreg;
  1087.       src->origin = inst->src[i].val32;
  1088.  
  1089.       /* do the same for src */
  1090.       if (inst->dst.file == TOY_FILE_IMM) {
  1091.          switch (inst->src[i].type) {
  1092.          case TOY_TYPE_W:
  1093.          case TOY_TYPE_UW:
  1094.             src->origin &= 0xffff;
  1095.             src->origin |= src->origin << 16;
  1096.             break;
  1097.          default:
  1098.             break;
  1099.          }
  1100.       }
  1101.  
  1102.       src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
  1103.       src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
  1104.       src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
  1105.       src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
  1106.       src->absolute = inst->src[i].absolute;
  1107.       src->negate = inst->src[i].negate;
  1108.  
  1109.       switch (inst->src[i].rect) {
  1110.       case TOY_RECT_LINEAR:
  1111.          switch (rect_linear_width) {
  1112.          case 1:
  1113.             src->vert_stride = GEN6_VERTSTRIDE_1;
  1114.             src->width = GEN6_WIDTH_1;
  1115.             break;
  1116.          case 2:
  1117.             src->vert_stride = GEN6_VERTSTRIDE_2;
  1118.             src->width = GEN6_WIDTH_2;
  1119.             break;
  1120.          case 4:
  1121.             src->vert_stride = GEN6_VERTSTRIDE_4;
  1122.             src->width = GEN6_WIDTH_4;
  1123.             break;
  1124.          case 8:
  1125.             src->vert_stride = GEN6_VERTSTRIDE_8;
  1126.             src->width = GEN6_WIDTH_8;
  1127.             break;
  1128.          case 16:
  1129.             src->vert_stride = GEN6_VERTSTRIDE_16;
  1130.             src->width = GEN6_WIDTH_16;
  1131.             break;
  1132.          default:
  1133.             assert(!"unsupported TOY_RECT_LINEAR width");
  1134.             src->vert_stride = GEN6_VERTSTRIDE_1;
  1135.             src->width = GEN6_WIDTH_1;
  1136.             break;
  1137.          }
  1138.          src->horz_stride = GEN6_HORZSTRIDE_1;
  1139.          break;
  1140.       case TOY_RECT_041:
  1141.          src->vert_stride = GEN6_VERTSTRIDE_0;
  1142.          src->width = GEN6_WIDTH_4;
  1143.          src->horz_stride = GEN6_HORZSTRIDE_1;
  1144.          break;
  1145.       case TOY_RECT_010:
  1146.          src->vert_stride = GEN6_VERTSTRIDE_0;
  1147.          src->width = GEN6_WIDTH_1;
  1148.          src->horz_stride = GEN6_HORZSTRIDE_0;
  1149.          break;
  1150.       case TOY_RECT_220:
  1151.          src->vert_stride = GEN6_VERTSTRIDE_2;
  1152.          src->width = GEN6_WIDTH_2;
  1153.          src->horz_stride = GEN6_HORZSTRIDE_0;
  1154.          break;
  1155.       case TOY_RECT_440:
  1156.          src->vert_stride = GEN6_VERTSTRIDE_4;
  1157.          src->width = GEN6_WIDTH_4;
  1158.          src->horz_stride = GEN6_HORZSTRIDE_0;
  1159.          break;
  1160.       case TOY_RECT_240:
  1161.          src->vert_stride = GEN6_VERTSTRIDE_2;
  1162.          src->width = GEN6_WIDTH_4;
  1163.          src->horz_stride = GEN6_HORZSTRIDE_0;
  1164.          break;
  1165.       default:
  1166.          assert(!"unsupported src region");
  1167.          src->vert_stride = GEN6_VERTSTRIDE_1;
  1168.          src->width = GEN6_WIDTH_1;
  1169.          src->horz_stride = GEN6_HORZSTRIDE_1;
  1170.          break;
  1171.       }
  1172.    }
  1173. }
  1174.  
  1175. /**
  1176.  * Generate HW shader code.  The instructions should have been legalized.
  1177.  */
  1178. void *
  1179. toy_compiler_assemble(struct toy_compiler *tc, int *size)
  1180. {
  1181.    const struct toy_inst *inst;
  1182.    uint32_t *code;
  1183.    int pc;
  1184.  
  1185.    code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
  1186.    if (!code)
  1187.       return NULL;
  1188.  
  1189.    pc = 0;
  1190.    tc_head(tc);
  1191.    while ((inst = tc_next(tc)) != NULL) {
  1192.       uint32_t *dw = &code[pc * 4];
  1193.       struct codegen cg;
  1194.  
  1195.       if (pc >= tc->num_instructions) {
  1196.          tc_fail(tc, "wrong instructoun count");
  1197.          break;
  1198.       }
  1199.  
  1200.       codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width);
  1201.       codegen_validate_region_restrictions(&cg);
  1202.  
  1203.       switch (inst->opcode) {
  1204.       case GEN6_OPCODE_MAD:
  1205.          codegen_inst_3src_gen6(&cg, dw);
  1206.          break;
  1207.       default:
  1208.          codegen_inst_gen6(&cg, dw);
  1209.          break;
  1210.       }
  1211.  
  1212.       pc++;
  1213.    }
  1214.  
  1215.    /* never return an invalid kernel */
  1216.    if (tc->fail) {
  1217.       FREE(code);
  1218.       return NULL;
  1219.    }
  1220.  
  1221.    if (size)
  1222.       *size = pc * 4 * sizeof(uint32_t);
  1223.  
  1224.    return code;
  1225. }
  1226.