0,0 → 1,1225 |
/* |
* Mesa 3-D graphics library |
* |
* Copyright (C) 2012-2013 LunarG, Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included |
* in all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
* DEALINGS IN THE SOFTWARE. |
* |
* Authors: |
* Chia-I Wu <olv@lunarg.com> |
*/ |
|
#include "toy_compiler.h" |
|
#define CG_REG_SHIFT 5 |
#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT) |
|
struct codegen { |
const struct ilo_dev *dev; |
const struct toy_inst *inst; |
int pc; |
|
unsigned flag_reg_num; |
unsigned flag_sub_reg_num; |
|
struct codegen_dst { |
unsigned file; |
unsigned type; |
bool indirect; |
unsigned indirect_subreg; |
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
|
unsigned horz_stride; |
|
unsigned writemask; |
} dst; |
|
struct codegen_src { |
unsigned file; |
unsigned type; |
bool indirect; |
unsigned indirect_subreg; |
unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */ |
|
unsigned vert_stride; |
unsigned width; |
unsigned horz_stride; |
|
unsigned swizzle[4]; |
bool absolute; |
bool negate; |
} src[3]; |
}; |
|
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 107-108: |
* |
* "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up |
* result forms bits [88:77], the source 0 register region fields, of the |
* 128-bit instruction word." |
* |
* "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit |
* table-look-up result forms bits [100:96], [68,64] and [52,48] of the |
* 128-bit instruction word." |
* |
* "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit |
* table-look-up result forms bits [63:61] and [46, 32] of the 128-bit |
* instruction word." |
* |
* "(ControlIndex) The 5-bit index for data type fields. The 17-bit |
* table-look-up result forms bits[31], and [23, 8] of the 128-bit |
* instruction word." |
*/ |
static const struct toy_compaction_table toy_compaction_table_gen6 = { |
.control = { |
[0] = 0x00000, /* 00000000000000000 */ |
[1] = 0x08000, /* 01000000000000000 */ |
[2] = 0x06000, /* 00110000000000000 */ |
[3] = 0x00100, /* 00000000100000000 */ |
[4] = 0x02000, /* 00010000000000000 */ |
[5] = 0x01100, /* 00001000100000000 */ |
[6] = 0x00102, /* 00000000100000010 */ |
[7] = 0x00002, /* 00000000000000010 */ |
[8] = 0x08100, /* 01000000100000000 */ |
[9] = 0x0a000, /* 01010000000000000 */ |
[10] = 0x16000, /* 10110000000000000 */ |
[11] = 0x04000, /* 00100000000000000 */ |
[12] = 0x1a000, /* 11010000000000000 */ |
[13] = 0x18000, /* 11000000000000000 */ |
[14] = 0x09100, /* 01001000100000000 */ |
[15] = 0x08008, /* 01000000000001000 */ |
[16] = 0x08004, /* 01000000000000100 */ |
[17] = 0x00008, /* 00000000000001000 */ |
[18] = 0x00004, /* 00000000000000100 */ |
[19] = 0x01100, /* 00111000100000000 */ |
[20] = 0x01102, /* 00001000100000010 */ |
[21] = 0x06100, /* 00110000100000000 */ |
[22] = 0x06001, /* 00110000000000001 */ |
[23] = 0x04001, /* 00100000000000001 */ |
[24] = 0x06002, /* 00110000000000010 */ |
[25] = 0x06005, /* 00110000000000101 */ |
[26] = 0x06009, /* 00110000000001001 */ |
[27] = 0x06010, /* 00110000000010000 */ |
[28] = 0x06003, /* 00110000000000011 */ |
[29] = 0x06004, /* 00110000000000100 */ |
[30] = 0x06108, /* 00110000100001000 */ |
[31] = 0x04009, /* 00100000000001001 */ |
}, |
.datatype = { |
[0] = 0x09c00, /* 001001110000000000 */ |
[1] = 0x08c20, /* 001000110000100000 */ |
[2] = 0x09c01, /* 001001110000000001 */ |
[3] = 0x08060, /* 001000000001100000 */ |
[4] = 0x0ad29, /* 001010110100101001 */ |
[5] = 0x081ad, /* 001000000110101101 */ |
[6] = 0x0c62c, /* 001100011000101100 */ |
[7] = 0x0bdad, /* 001011110110101101 */ |
[8] = 0x081ec, /* 001000000111101100 */ |
[9] = 0x08061, /* 001000000001100001 */ |
[10] = 0x08ca5, /* 001000110010100101 */ |
[11] = 0x08041, /* 001000000001000001 */ |
[12] = 0x08231, /* 001000001000110001 */ |
[13] = 0x08229, /* 001000001000101001 */ |
[14] = 0x08020, /* 001000000000100000 */ |
[15] = 0x08232, /* 001000001000110010 */ |
[16] = 0x0a529, /* 001010010100101001 */ |
[17] = 0x0b4a5, /* 001011010010100101 */ |
[18] = 0x081a5, /* 001000000110100101 */ |
[19] = 0x0c629, /* 001100011000101001 */ |
[20] = 0x0b62c, /* 001011011000101100 */ |
[21] = 0x0b5a5, /* 001011010110100101 */ |
[22] = 0x0bda5, /* 001011110110100101 */ |
[23] = 0x0f1bd, /* 001111011110111101 */ |
[24] = 0x0f1bc, /* 001111011110111100 */ |
[25] = 0x0f1bd, /* 001111011110111101 */ |
[26] = 0x0f19d, /* 001111011110011101 */ |
[27] = 0x0f1be, /* 001111011110111110 */ |
[28] = 0x08021, /* 001000000000100001 */ |
[29] = 0x08022, /* 001000000000100010 */ |
[30] = 0x09fdd, /* 001001111111011101 */ |
[31] = 0x083be, /* 001000001110111110 */ |
}, |
.subreg = { |
[0] = 0x0000, /* 000000000000000 */ |
[1] = 0x0004, /* 000000000000100 */ |
[2] = 0x0180, /* 000000110000000 */ |
[3] = 0x1000, /* 111000000000000 */ |
[4] = 0x3c08, /* 011110000001000 */ |
[5] = 0x0400, /* 000010000000000 */ |
[6] = 0x0010, /* 000000000010000 */ |
[7] = 0x0c0c, /* 000110000001100 */ |
[8] = 0x1000, /* 001000000000000 */ |
[9] = 0x0200, /* 000001000000000 */ |
[10] = 0x0294, /* 000001010010100 */ |
[11] = 0x0056, /* 000000001010110 */ |
[12] = 0x2000, /* 010000000000000 */ |
[13] = 0x6000, /* 110000000000000 */ |
[14] = 0x0800, /* 000100000000000 */ |
[15] = 0x0080, /* 000000010000000 */ |
[16] = 0x0008, /* 000000000001000 */ |
[17] = 0x4000, /* 100000000000000 */ |
[18] = 0x0280, /* 000001010000000 */ |
[19] = 0x1400, /* 001010000000000 */ |
[20] = 0x1800, /* 001100000000000 */ |
[21] = 0x0054, /* 000000001010100 */ |
[22] = 0x5a94, /* 101101010010100 */ |
[23] = 0x2800, /* 010100000000000 */ |
[24] = 0x008f, /* 000000010001111 */ |
[25] = 0x3000, /* 011000000000000 */ |
[26] = 0x1c00, /* 111110000000000 */ |
[27] = 0x5000, /* 101000000000000 */ |
[28] = 0x000f, /* 000000000001111 */ |
[29] = 0x088f, /* 000100010001111 */ |
[30] = 0x108f, /* 001000010001111 */ |
[31] = 0x0c00, /* 000110000000000 */ |
}, |
.src = { |
[0] = 0x000, /* 000000000000 */ |
[1] = 0x588, /* 010110001000 */ |
[2] = 0x468, /* 010001101000 */ |
[3] = 0x228, /* 001000101000 */ |
[4] = 0x690, /* 011010010000 */ |
[5] = 0x120, /* 000100100000 */ |
[6] = 0x46c, /* 010001101100 */ |
[7] = 0x510, /* 010101110000 */ |
[8] = 0x618, /* 011001111000 */ |
[9] = 0x328, /* 001100101000 */ |
[10] = 0x58c, /* 010110001100 */ |
[11] = 0x220, /* 001000100000 */ |
[12] = 0x58a, /* 010110001010 */ |
[13] = 0x002, /* 000000000010 */ |
[14] = 0x550, /* 010101010000 */ |
[15] = 0x568, /* 010101101000 */ |
[16] = 0xf4c, /* 111101001100 */ |
[17] = 0xf2c, /* 111100101100 */ |
[18] = 0x610, /* 011001110000 */ |
[19] = 0x589, /* 010110001001 */ |
[20] = 0x558, /* 010101011000 */ |
[21] = 0x348, /* 001101001000 */ |
[22] = 0x42c, /* 010000101100 */ |
[23] = 0x400, /* 010000000000 */ |
[24] = 0x310, /* 001101110000 */ |
[25] = 0x310, /* 001100010000 */ |
[26] = 0x300, /* 001100000000 */ |
[27] = 0x46a, /* 010001101010 */ |
[28] = 0x318, /* 001101111000 */ |
[29] = 0x010, /* 000001110000 */ |
[30] = 0x320, /* 001100100000 */ |
[31] = 0x350, /* 001101010000 */ |
}, |
}; |
|
/* |
* From the Ivy Bridge PRM, volume 4 part 3, page 128: |
* |
* "(Src0Index) Lookup one of 32 12-bit values. That value is used (from |
* MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride, |
* Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields." |
* |
* "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from |
* MSB to LSB) for various fields for Src1, Src0, and Dst, including |
* ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending |
* on AddrMode and AccessMode. |
* |
* "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used |
* (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType, |
* Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and |
* Src1.RegType bit fields." |
* |
* "(ControlIndex) Lookup one of 32 19-bit values. That value is used |
* (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate, |
* ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl, |
* and AccessMode bit fields." |
*/ |
static const struct toy_compaction_table toy_compaction_table_gen7 = { |
.control = { |
[0] = 0x00002, /* 0000000000000000010 */ |
[1] = 0x04000, /* 0000100000000000000 */ |
[2] = 0x04001, /* 0000100000000000001 */ |
[3] = 0x04002, /* 0000100000000000010 */ |
[4] = 0x04003, /* 0000100000000000011 */ |
[5] = 0x04004, /* 0000100000000000100 */ |
[6] = 0x04005, /* 0000100000000000101 */ |
[7] = 0x04007, /* 0000100000000000111 */ |
[8] = 0x04008, /* 0000100000000001000 */ |
[9] = 0x04009, /* 0000100000000001001 */ |
[10] = 0x0400d, /* 0000100000000001101 */ |
[11] = 0x06000, /* 0000110000000000000 */ |
[12] = 0x06001, /* 0000110000000000001 */ |
[13] = 0x06002, /* 0000110000000000010 */ |
[14] = 0x06003, /* 0000110000000000011 */ |
[15] = 0x06004, /* 0000110000000000100 */ |
[16] = 0x06005, /* 0000110000000000101 */ |
[17] = 0x06007, /* 0000110000000000111 */ |
[18] = 0x06009, /* 0000110000000001001 */ |
[19] = 0x0600d, /* 0000110000000001101 */ |
[20] = 0x06010, /* 0000110000000010000 */ |
[21] = 0x06100, /* 0000110000100000000 */ |
[22] = 0x08000, /* 0001000000000000000 */ |
[23] = 0x08002, /* 0001000000000000010 */ |
[24] = 0x08004, /* 0001000000000000100 */ |
[25] = 0x08100, /* 0001000000100000000 */ |
[26] = 0x16000, /* 0010110000000000000 */ |
[27] = 0x16010, /* 0010110000000010000 */ |
[28] = 0x18000, /* 0011000000000000000 */ |
[29] = 0x18100, /* 0011000000100000000 */ |
[30] = 0x28000, /* 0101000000000000000 */ |
[31] = 0x28100, /* 0101000000100000000 */ |
}, |
.datatype = { |
[0] = 0x08001, /* 001000000000000001 */ |
[1] = 0x08020, /* 001000000000100000 */ |
[2] = 0x08021, /* 001000000000100001 */ |
[3] = 0x08061, /* 001000000001100001 */ |
[4] = 0x080bd, /* 001000000010111101 */ |
[5] = 0x082fd, /* 001000001011111101 */ |
[6] = 0x083a1, /* 001000001110100001 */ |
[7] = 0x083a5, /* 001000001110100101 */ |
[8] = 0x083bd, /* 001000001110111101 */ |
[9] = 0x08421, /* 001000010000100001 */ |
[10] = 0x08c20, /* 001000110000100000 */ |
[11] = 0x08c21, /* 001000110000100001 */ |
[12] = 0x094a5, /* 001001010010100101 */ |
[13] = 0x09ca4, /* 001001110010100100 */ |
[14] = 0x09ca5, /* 001001110010100101 */ |
[15] = 0x0f3bd, /* 001111001110111101 */ |
[16] = 0x0f79d, /* 001111011110011101 */ |
[17] = 0x0f7bc, /* 001111011110111100 */ |
[18] = 0x0f7bd, /* 001111011110111101 */ |
[19] = 0x0ffbc, /* 001111111110111100 */ |
[20] = 0x0020c, /* 000000001000001100 */ |
[21] = 0x0803d, /* 001000000000111101 */ |
[22] = 0x080a5, /* 001000000010100101 */ |
[23] = 0x08420, /* 001000010000100000 */ |
[24] = 0x094a4, /* 001001010010100100 */ |
[25] = 0x09c84, /* 001001110010000100 */ |
[26] = 0x0a509, /* 001010010100001001 */ |
[27] = 0x0dfbd, /* 001101111110111101 */ |
[28] = 0x0ffbd, /* 001111111110111101 */ |
[29] = 0x0bdac, /* 001011110110101100 */ |
[30] = 0x0a528, /* 001010010100101000 */ |
[31] = 0x0ad28, /* 001010110100101000 */ |
}, |
.subreg = { |
[0] = 0x0000, /* 000000000000000 */ |
[1] = 0x0001, /* 000000000000001 */ |
[2] = 0x0008, /* 000000000001000 */ |
[3] = 0x000f, /* 000000000001111 */ |
[4] = 0x0010, /* 000000000010000 */ |
[5] = 0x0080, /* 000000010000000 */ |
[6] = 0x0100, /* 000000100000000 */ |
[7] = 0x0180, /* 000000110000000 */ |
[8] = 0x0200, /* 000001000000000 */ |
[9] = 0x0210, /* 000001000010000 */ |
[10] = 0x0280, /* 000001010000000 */ |
[11] = 0x1000, /* 001000000000000 */ |
[12] = 0x1001, /* 001000000000001 */ |
[13] = 0x1081, /* 001000010000001 */ |
[14] = 0x1082, /* 001000010000010 */ |
[15] = 0x1083, /* 001000010000011 */ |
[16] = 0x1084, /* 001000010000100 */ |
[17] = 0x1087, /* 001000010000111 */ |
[18] = 0x1088, /* 001000010001000 */ |
[19] = 0x108e, /* 001000010001110 */ |
[20] = 0x108f, /* 001000010001111 */ |
[21] = 0x1180, /* 001000110000000 */ |
[22] = 0x11e8, /* 001000111101000 */ |
[23] = 0x2000, /* 010000000000000 */ |
[24] = 0x2180, /* 010000110000000 */ |
[25] = 0x3000, /* 011000000000000 */ |
[26] = 0x3c87, /* 011110010000111 */ |
[27] = 0x4000, /* 100000000000000 */ |
[28] = 0x5000, /* 101000000000000 */ |
[29] = 0x6000, /* 110000000000000 */ |
[30] = 0x7000, /* 111000000000000 */ |
[31] = 0x701c, /* 111000000011100 */ |
}, |
.src = { |
[0] = 0x000, /* 000000000000 */ |
[1] = 0x002, /* 000000000010 */ |
[2] = 0x010, /* 000000010000 */ |
[3] = 0x012, /* 000000010010 */ |
[4] = 0x018, /* 000000011000 */ |
[5] = 0x020, /* 000000100000 */ |
[6] = 0x028, /* 000000101000 */ |
[7] = 0x048, /* 000001001000 */ |
[8] = 0x050, /* 000001010000 */ |
[9] = 0x070, /* 000001110000 */ |
[10] = 0x078, /* 000001111000 */ |
[11] = 0x300, /* 001100000000 */ |
[12] = 0x302, /* 001100000010 */ |
[13] = 0x308, /* 001100001000 */ |
[14] = 0x310, /* 001100010000 */ |
[15] = 0x312, /* 001100010010 */ |
[16] = 0x320, /* 001100100000 */ |
[17] = 0x328, /* 001100101000 */ |
[18] = 0x338, /* 001100111000 */ |
[19] = 0x340, /* 001101000000 */ |
[20] = 0x342, /* 001101000010 */ |
[21] = 0x348, /* 001101001000 */ |
[22] = 0x350, /* 001101010000 */ |
[23] = 0x360, /* 001101100000 */ |
[24] = 0x368, /* 001101101000 */ |
[25] = 0x370, /* 001101110000 */ |
[26] = 0x371, /* 001101110001 */ |
[27] = 0x378, /* 001101111000 */ |
[28] = 0x468, /* 010001101000 */ |
[29] = 0x469, /* 010001101001 */ |
[30] = 0x46a, /* 010001101010 */ |
[31] = 0x588, /* 010110001000 */ |
}, |
}; |
|
static const struct toy_compaction_table toy_compaction_table_gen8 = { |
.control = { |
}, |
.datatype = { |
}, |
.subreg = { |
}, |
.src = { |
}, |
.control_3src = { |
}, |
.source_3src = { |
}, |
}; |
|
const struct toy_compaction_table * |
toy_compiler_get_compaction_table(const struct ilo_dev *dev) |
{ |
switch (ilo_dev_gen(dev)) { |
case ILO_GEN(8): |
return &toy_compaction_table_gen8; |
case ILO_GEN(7.5): |
case ILO_GEN(7): |
return &toy_compaction_table_gen7; |
case ILO_GEN(6): |
return &toy_compaction_table_gen6; |
default: |
assert(!"unsupported gen"); |
return NULL; |
} |
} |
|
/** |
* Return true if the source operand is null. |
*/ |
static bool |
src_is_null(const struct codegen *cg, int idx) |
{ |
const struct codegen_src *src = &cg->src[idx]; |
|
return (src->file == GEN6_FILE_ARF && |
src->origin == GEN6_ARF_NULL << CG_REG_SHIFT); |
} |
|
/** |
* Translate a source operand to DW2 or DW3 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_src_gen6(const struct codegen *cg, int idx) |
{ |
const struct codegen_src *src = &cg->src[idx]; |
uint32_t dw; |
|
ILO_DEV_ASSERT(cg->dev, 6, 8); |
|
/* special treatment may be needed if any of the operand is immediate */ |
if (cg->src[0].file == GEN6_FILE_IMM) { |
assert(!cg->src[0].absolute && !cg->src[0].negate); |
|
/* only the last src operand can be an immediate unless it is Gen8+ */ |
assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1)); |
|
if (!src_is_null(cg, 1)) |
return cg->src[idx].origin; |
|
if (idx == 0) { |
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
return cg->src[1].type << 27 | |
cg->src[1].file << 25; |
} else { |
return cg->flag_sub_reg_num << 25; |
} |
} else { |
return cg->src[0].origin; |
} |
} |
else if (idx && cg->src[1].file == GEN6_FILE_IMM) { |
assert(!cg->src[1].absolute && !cg->src[1].negate); |
return cg->src[1].origin; |
} |
|
assert(src->file != GEN6_FILE_IMM); |
|
if (src->indirect) { |
const int offset = (int) src->origin; |
|
assert(src->file == GEN6_FILE_GRF); |
assert(offset < 512 && offset >= -512); |
|
if (cg->inst->access_mode == GEN6_ALIGN_16) { |
assert(src->width == GEN6_WIDTH_4); |
assert(src->horz_stride == GEN6_HORZSTRIDE_1); |
|
/* the lower 4 bits are reserved for the swizzle_[xy] */ |
assert(!(src->origin & 0xf)); |
|
dw = src->vert_stride << 21 | |
src->swizzle[3] << 18 | |
src->swizzle[2] << 16 | |
GEN6_ADDRMODE_INDIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->swizzle[1] << 2 | |
src->swizzle[0]; |
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
dw |= src->indirect_subreg << 9 | |
(src->origin & 0x1f0); |
} else { |
dw |= src->indirect_subreg << 10 | |
(src->origin & 0x3f0); |
} |
} |
else { |
assert(src->swizzle[0] == TOY_SWIZZLE_X && |
src->swizzle[1] == TOY_SWIZZLE_Y && |
src->swizzle[2] == TOY_SWIZZLE_Z && |
src->swizzle[3] == TOY_SWIZZLE_W); |
|
dw = src->vert_stride << 21 | |
src->width << 18 | |
src->horz_stride << 16 | |
GEN6_ADDRMODE_INDIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13; |
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
dw |= src->indirect_subreg << 9 | |
(src->origin & 0x1ff); |
} else { |
dw |= src->indirect_subreg << 10 | |
(src->origin & 0x3ff); |
} |
} |
} |
else { |
switch (src->file) { |
case GEN6_FILE_ARF: |
break; |
case GEN6_FILE_GRF: |
assert(CG_REG_NUM(src->origin) < 128); |
break; |
case GEN6_FILE_MRF: |
assert(cg->inst->opcode == GEN6_OPCODE_SEND || |
cg->inst->opcode == GEN6_OPCODE_SENDC); |
assert(CG_REG_NUM(src->origin) < 16); |
break; |
case GEN6_FILE_IMM: |
default: |
assert(!"invalid src file"); |
break; |
} |
|
if (cg->inst->access_mode == GEN6_ALIGN_16) { |
assert(src->width == GEN6_WIDTH_4); |
assert(src->horz_stride == GEN6_HORZSTRIDE_1); |
|
/* the lower 4 bits are reserved for the swizzle_[xy] */ |
assert(!(src->origin & 0xf)); |
|
dw = src->vert_stride << 21 | |
src->swizzle[3] << 18 | |
src->swizzle[2] << 16 | |
GEN6_ADDRMODE_DIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->origin | |
src->swizzle[1] << 2 | |
src->swizzle[0]; |
} |
else { |
assert(src->swizzle[0] == TOY_SWIZZLE_X && |
src->swizzle[1] == TOY_SWIZZLE_Y && |
src->swizzle[2] == TOY_SWIZZLE_Z && |
src->swizzle[3] == TOY_SWIZZLE_W); |
|
dw = src->vert_stride << 21 | |
src->width << 18 | |
src->horz_stride << 16 | |
GEN6_ADDRMODE_DIRECT << 15 | |
src->negate << 14 | |
src->absolute << 13 | |
src->origin; |
} |
} |
|
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
const bool indirect_origin_bit9 = (cg->dst.indirect) ? |
(src->origin & 0x200) : 0; |
|
if (idx == 0) { |
dw |= indirect_origin_bit9 << 31 | |
cg->src[1].type << 27 | |
cg->src[1].file << 25; |
} else { |
dw |= indirect_origin_bit9 << 25; |
} |
} else { |
if (idx == 0) |
dw |= cg->flag_sub_reg_num << 25; |
} |
|
return dw; |
} |
|
/** |
* Translate the destination operand to the higher 16 bits of DW1 of the |
* 1-src/2-src format. |
*/ |
static uint16_t |
translate_dst_region_gen6(const struct codegen *cg) |
{ |
const struct codegen_dst *dst = &cg->dst; |
uint16_t dw1_region; |
|
ILO_DEV_ASSERT(cg->dev, 6, 8); |
|
if (dst->file == GEN6_FILE_IMM) { |
/* dst is immediate (JIP) when the opcode is a conditional branch */ |
switch (cg->inst->opcode) { |
case GEN6_OPCODE_IF: |
case GEN6_OPCODE_ELSE: |
case GEN6_OPCODE_ENDIF: |
case GEN6_OPCODE_WHILE: |
assert(dst->type == GEN6_TYPE_W); |
dw1_region = (dst->origin & 0xffff); |
break; |
default: |
assert(!"dst cannot be immediate"); |
dw1_region = 0; |
break; |
} |
|
return dw1_region; |
} |
|
if (dst->indirect) { |
const int offset = (int) dst->origin; |
|
assert(dst->file == GEN6_FILE_GRF); |
assert(offset < 512 && offset >= -512); |
|
if (cg->inst->access_mode == GEN6_ALIGN_16) { |
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 144: |
* |
* "Allthough Dst.HorzStride is a don't care for Align16, HW |
* needs this to be programmed as 01." |
*/ |
assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
/* the lower 4 bits are reserved for the writemask */ |
assert(!(dst->origin & 0xf)); |
|
dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | |
dst->horz_stride << 13 | |
dst->writemask; |
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
dw1_region |= dst->indirect_subreg << 9 | |
(dst->origin & 0x1f0); |
} else { |
dw1_region |= dst->indirect_subreg << 10 | |
(dst->origin & 0x3f0); |
} |
} |
else { |
assert(dst->writemask == TOY_WRITEMASK_XYZW); |
|
dw1_region = GEN6_ADDRMODE_INDIRECT << 15 | |
dst->horz_stride << 13; |
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
dw1_region |= dst->indirect_subreg << 9 | |
(dst->origin & 0x1ff); |
} else { |
dw1_region |= dst->indirect_subreg << 10 | |
(dst->origin & 0x3ff); |
} |
} |
} |
else { |
assert((dst->file == GEN6_FILE_GRF && |
CG_REG_NUM(dst->origin) < 128) || |
(dst->file == GEN6_FILE_MRF && |
CG_REG_NUM(dst->origin) < 16) || |
(dst->file == GEN6_FILE_ARF)); |
|
if (cg->inst->access_mode == GEN6_ALIGN_16) { |
/* similar to the indirect case */ |
assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
assert(!(dst->origin & 0xf)); |
|
dw1_region = GEN6_ADDRMODE_DIRECT << 15 | |
dst->horz_stride << 13 | |
dst->origin | |
dst->writemask; |
} |
else { |
assert(dst->writemask == TOY_WRITEMASK_XYZW); |
|
dw1_region = GEN6_ADDRMODE_DIRECT << 15 | |
dst->horz_stride << 13 | |
dst->origin; |
} |
} |
|
return dw1_region; |
} |
|
/** |
* Translate the destination operand to DW1 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_dst_gen6(const struct codegen *cg) |
{ |
ILO_DEV_ASSERT(cg->dev, 6, 7.5); |
|
return translate_dst_region_gen6(cg) << 16 | |
cg->src[1].type << 12 | |
cg->src[1].file << 10 | |
cg->src[0].type << 7 | |
cg->src[0].file << 5 | |
cg->dst.type << 2 | |
cg->dst.file; |
} |
|
static uint32_t |
translate_dst_gen8(const struct codegen *cg) |
{ |
const bool indirect_origin_bit9 = (cg->dst.indirect) ? |
(cg->dst.origin & 0x200) : 0; |
|
ILO_DEV_ASSERT(cg->dev, 8, 8); |
|
return translate_dst_region_gen6(cg) << 16 | |
indirect_origin_bit9 << 15 | |
cg->src[0].type << 11 | |
cg->src[0].file << 9 | |
cg->dst.type << 5 | |
cg->dst.file << 3 | |
cg->inst->mask_ctrl << 2 | |
cg->flag_reg_num << 1 | |
cg->flag_sub_reg_num; |
} |
|
/** |
* Translate the instruction to DW0 of the 1-src/2-src format. |
*/ |
static uint32_t |
translate_inst_gen6(const struct codegen *cg) |
{ |
const bool debug_ctrl = false; |
const bool cmpt_ctrl = false; |
|
ILO_DEV_ASSERT(cg->dev, 6, 7.5); |
|
assert(cg->inst->opcode < 128); |
|
return cg->inst->saturate << 31 | |
debug_ctrl << 30 | |
cmpt_ctrl << 29 | |
cg->inst->acc_wr_ctrl << 28 | |
cg->inst->cond_modifier << 24 | |
cg->inst->exec_size << 21 | |
cg->inst->pred_inv << 20 | |
cg->inst->pred_ctrl << 16 | |
cg->inst->thread_ctrl << 14 | |
cg->inst->qtr_ctrl << 12 | |
cg->inst->dep_ctrl << 10 | |
cg->inst->mask_ctrl << 9 | |
cg->inst->access_mode << 8 | |
cg->inst->opcode; |
} |
|
static uint32_t |
translate_inst_gen8(const struct codegen *cg) |
{ |
const bool debug_ctrl = false; |
const bool cmpt_ctrl = false; |
|
ILO_DEV_ASSERT(cg->dev, 8, 8); |
|
assert(cg->inst->opcode < 128); |
|
return cg->inst->saturate << 31 | |
debug_ctrl << 30 | |
cmpt_ctrl << 29 | |
cg->inst->acc_wr_ctrl << 28 | |
cg->inst->cond_modifier << 24 | |
cg->inst->exec_size << 21 | |
cg->inst->pred_inv << 20 | |
cg->inst->pred_ctrl << 16 | |
cg->inst->thread_ctrl << 14 | |
cg->inst->qtr_ctrl << 12 | |
cg->inst->dep_ctrl << 9 | |
cg->inst->access_mode << 8 | |
cg->inst->opcode; |
} |
|
/** |
* Codegen an instruction in 1-src/2-src format. |
*/ |
static void |
codegen_inst_gen6(const struct codegen *cg, uint32_t *code) |
{ |
ILO_DEV_ASSERT(cg->dev, 6, 8); |
|
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
code[0] = translate_inst_gen8(cg); |
code[1] = translate_dst_gen8(cg); |
} else { |
code[0] = translate_inst_gen6(cg); |
code[1] = translate_dst_gen6(cg); |
} |
|
code[2] = translate_src_gen6(cg, 0); |
code[3] = translate_src_gen6(cg, 1); |
assert(src_is_null(cg, 2)); |
} |
|
/** |
* Codegen an instruction in 3-src format. |
*/ |
static void |
codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code) |
{ |
const struct codegen_dst *dst = &cg->dst; |
uint32_t dw0, dw1, dw_src[3]; |
int i; |
|
ILO_DEV_ASSERT(cg->dev, 6, 8); |
|
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) |
dw0 = translate_inst_gen8(cg); |
else |
dw0 = translate_inst_gen6(cg); |
|
/* |
* 3-src instruction restrictions |
* |
* - align16 with direct addressing |
* - GRF or MRF dst |
* - GRF src |
* - sub_reg_num is DWORD aligned |
* - no regioning except replication control |
* (vert_stride == 0 && horz_stride == 0) |
*/ |
assert(cg->inst->access_mode == GEN6_ALIGN_16); |
|
assert(!dst->indirect); |
assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) || |
(dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16)); |
assert(!(dst->origin & 0x3)); |
assert(dst->horz_stride == GEN6_HORZSTRIDE_1); |
|
if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) { |
dw1 = dst->origin << 19 | |
dst->writemask << 17 | |
cg->src[2].negate << 10 | |
cg->src[2].negate << 10 | |
cg->src[2].absolute << 9 | |
cg->src[1].negate << 8 | |
cg->src[1].absolute << 7 | |
cg->src[0].negate << 6 | |
cg->src[0].absolute << 5 | |
cg->inst->mask_ctrl << 2 | |
cg->flag_reg_num << 1 | |
cg->flag_sub_reg_num; |
} else { |
dw1 = dst->origin << 19 | |
dst->writemask << 17 | |
cg->src[2].negate << 9 | |
cg->src[2].absolute << 8 | |
cg->src[1].negate << 7 | |
cg->src[1].absolute << 6 | |
cg->src[0].negate << 5 | |
cg->src[0].absolute << 4 | |
cg->flag_sub_reg_num << 1 | |
(dst->file == GEN6_FILE_MRF); |
} |
|
for (i = 0; i < 3; i++) { |
const struct codegen_src *src = &cg->src[i]; |
|
assert(!src->indirect); |
assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128); |
assert(!(src->origin & 0x3)); |
|
assert((src->vert_stride == GEN6_VERTSTRIDE_4 && |
src->horz_stride == GEN6_HORZSTRIDE_1) || |
(src->vert_stride == GEN6_VERTSTRIDE_0 && |
src->horz_stride == GEN6_HORZSTRIDE_0)); |
assert(src->width == GEN6_WIDTH_4); |
|
dw_src[i] = src->origin << 7 | |
src->swizzle[3] << 7 | |
src->swizzle[2] << 5 | |
src->swizzle[1] << 3 | |
src->swizzle[0] << 1 | |
(src->vert_stride == GEN6_VERTSTRIDE_0 && |
src->horz_stride == GEN6_HORZSTRIDE_0); |
|
/* only the lower 20 bits are used */ |
assert((dw_src[i] & 0xfffff) == dw_src[i]); |
} |
|
code[0] = dw0; |
code[1] = dw1; |
/* concatenate the bits of dw_src */ |
code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0]; |
code[3] = dw_src[2] << 10 | (dw_src[1] >> 11); |
} |
|
/** |
* Sanity check the region parameters of the operands. |
*/ |
static void |
codegen_validate_region_restrictions(const struct codegen *cg) |
{ |
const int exec_size_map[] = { |
[GEN6_EXECSIZE_1] = 1, |
[GEN6_EXECSIZE_2] = 2, |
[GEN6_EXECSIZE_4] = 4, |
[GEN6_EXECSIZE_8] = 8, |
[GEN6_EXECSIZE_16] = 16, |
[GEN6_EXECSIZE_32] = 32, |
}; |
const int width_map[] = { |
[GEN6_WIDTH_1] = 1, |
[GEN6_WIDTH_2] = 2, |
[GEN6_WIDTH_4] = 4, |
[GEN6_WIDTH_8] = 8, |
[GEN6_WIDTH_16] = 16, |
}; |
const int horz_stride_map[] = { |
[GEN6_HORZSTRIDE_0] = 0, |
[GEN6_HORZSTRIDE_1] = 1, |
[GEN6_HORZSTRIDE_2] = 2, |
[GEN6_HORZSTRIDE_4] = 4, |
}; |
const int vert_stride_map[] = { |
[GEN6_VERTSTRIDE_0] = 0, |
[GEN6_VERTSTRIDE_1] = 1, |
[GEN6_VERTSTRIDE_2] = 2, |
[GEN6_VERTSTRIDE_4] = 4, |
[GEN6_VERTSTRIDE_8] = 8, |
[GEN6_VERTSTRIDE_16] = 16, |
[GEN6_VERTSTRIDE_32] = 32, |
[7] = 64, |
[8] = 128, |
[9] = 256, |
[GEN6_VERTSTRIDE_VXH] = 0, |
}; |
const int exec_size = exec_size_map[cg->inst->exec_size]; |
int i; |
|
/* Sandy Bridge PRM, volume 4 part 2, page 94 */ |
|
/* 1. (we don't do 32 anyway) */ |
assert(exec_size <= 16); |
|
for (i = 0; i < Elements(cg->src); i++) { |
const int width = width_map[cg->src[i].width]; |
const int horz_stride = horz_stride_map[cg->src[i].horz_stride]; |
const int vert_stride = vert_stride_map[cg->src[i].vert_stride]; |
|
if (src_is_null(cg, i)) |
break; |
|
/* 3. */ |
assert(exec_size >= width); |
|
if (exec_size == width) { |
/* 4. & 5. */ |
if (horz_stride) |
assert(vert_stride == width * horz_stride); |
} |
|
if (width == 1) { |
/* 6. */ |
assert(horz_stride == 0); |
|
/* 7. */ |
if (exec_size == 1) |
assert(vert_stride == 0); |
} |
|
/* 8. */ |
if (!vert_stride && !horz_stride) |
assert(width == 1); |
} |
|
/* derived from 10.1.2. & 10.2. */ |
assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0); |
} |
|
static unsigned |
translate_vfile(enum toy_file file) |
{ |
switch (file) { |
case TOY_FILE_ARF: return GEN6_FILE_ARF; |
case TOY_FILE_GRF: return GEN6_FILE_GRF; |
case TOY_FILE_MRF: return GEN6_FILE_MRF; |
case TOY_FILE_IMM: return GEN6_FILE_IMM; |
default: |
assert(!"unhandled toy file"); |
return GEN6_FILE_GRF; |
} |
} |
|
static unsigned |
translate_vtype(enum toy_type type) |
{ |
switch (type) { |
case TOY_TYPE_F: return GEN6_TYPE_F; |
case TOY_TYPE_D: return GEN6_TYPE_D; |
case TOY_TYPE_UD: return GEN6_TYPE_UD; |
case TOY_TYPE_W: return GEN6_TYPE_W; |
case TOY_TYPE_UW: return GEN6_TYPE_UW; |
case TOY_TYPE_V: return GEN6_TYPE_V_IMM; |
default: |
assert(!"unhandled toy type"); |
return GEN6_TYPE_F; |
} |
} |
|
static unsigned |
translate_writemask(enum toy_writemask writemask) |
{ |
/* TOY_WRITEMASK_* are compatible with the hardware definitions */ |
assert(writemask <= 0xf); |
return writemask; |
} |
|
static unsigned |
translate_swizzle(enum toy_swizzle swizzle) |
{ |
/* TOY_SWIZZLE_* are compatible with the hardware definitions */ |
assert(swizzle <= 3); |
return swizzle; |
} |
|
/** |
* Prepare for generating an instruction. |
*/ |
static void |
codegen_prepare(struct codegen *cg, const struct ilo_dev *dev, |
const struct toy_inst *inst, int pc, int rect_linear_width) |
{ |
int i; |
|
cg->dev = dev; |
cg->inst = inst; |
cg->pc = pc; |
|
cg->flag_reg_num = 0; |
cg->flag_sub_reg_num = 0; |
|
cg->dst.file = translate_vfile(inst->dst.file); |
cg->dst.type = translate_vtype(inst->dst.type); |
cg->dst.indirect = inst->dst.indirect; |
cg->dst.indirect_subreg = inst->dst.indirect_subreg; |
cg->dst.origin = inst->dst.val32; |
|
/* |
* From the Sandy Bridge PRM, volume 4 part 2, page 81: |
* |
* "For a word or an unsigned word immediate data, software must |
* replicate the same 16-bit immediate value to both the lower word |
* and the high word of the 32-bit immediate field in an instruction." |
*/ |
if (inst->dst.file == TOY_FILE_IMM) { |
switch (inst->dst.type) { |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
cg->dst.origin &= 0xffff; |
cg->dst.origin |= cg->dst.origin << 16; |
break; |
default: |
break; |
} |
} |
|
cg->dst.writemask = translate_writemask(inst->dst.writemask); |
|
switch (inst->dst.rect) { |
case TOY_RECT_LINEAR: |
cg->dst.horz_stride = GEN6_HORZSTRIDE_1; |
break; |
default: |
assert(!"unsupported dst region"); |
cg->dst.horz_stride = GEN6_HORZSTRIDE_1; |
break; |
} |
|
for (i = 0; i < Elements(cg->src); i++) { |
struct codegen_src *src = &cg->src[i]; |
|
src->file = translate_vfile(inst->src[i].file); |
src->type = translate_vtype(inst->src[i].type); |
src->indirect = inst->src[i].indirect; |
src->indirect_subreg = inst->src[i].indirect_subreg; |
src->origin = inst->src[i].val32; |
|
/* do the same for src */ |
if (inst->dst.file == TOY_FILE_IMM) { |
switch (inst->src[i].type) { |
case TOY_TYPE_W: |
case TOY_TYPE_UW: |
src->origin &= 0xffff; |
src->origin |= src->origin << 16; |
break; |
default: |
break; |
} |
} |
|
src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x); |
src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y); |
src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z); |
src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w); |
src->absolute = inst->src[i].absolute; |
src->negate = inst->src[i].negate; |
|
switch (inst->src[i].rect) { |
case TOY_RECT_LINEAR: |
switch (rect_linear_width) { |
case 1: |
src->vert_stride = GEN6_VERTSTRIDE_1; |
src->width = GEN6_WIDTH_1; |
break; |
case 2: |
src->vert_stride = GEN6_VERTSTRIDE_2; |
src->width = GEN6_WIDTH_2; |
break; |
case 4: |
src->vert_stride = GEN6_VERTSTRIDE_4; |
src->width = GEN6_WIDTH_4; |
break; |
case 8: |
src->vert_stride = GEN6_VERTSTRIDE_8; |
src->width = GEN6_WIDTH_8; |
break; |
case 16: |
src->vert_stride = GEN6_VERTSTRIDE_16; |
src->width = GEN6_WIDTH_16; |
break; |
default: |
assert(!"unsupported TOY_RECT_LINEAR width"); |
src->vert_stride = GEN6_VERTSTRIDE_1; |
src->width = GEN6_WIDTH_1; |
break; |
} |
src->horz_stride = GEN6_HORZSTRIDE_1; |
break; |
case TOY_RECT_041: |
src->vert_stride = GEN6_VERTSTRIDE_0; |
src->width = GEN6_WIDTH_4; |
src->horz_stride = GEN6_HORZSTRIDE_1; |
break; |
case TOY_RECT_010: |
src->vert_stride = GEN6_VERTSTRIDE_0; |
src->width = GEN6_WIDTH_1; |
src->horz_stride = GEN6_HORZSTRIDE_0; |
break; |
case TOY_RECT_220: |
src->vert_stride = GEN6_VERTSTRIDE_2; |
src->width = GEN6_WIDTH_2; |
src->horz_stride = GEN6_HORZSTRIDE_0; |
break; |
case TOY_RECT_440: |
src->vert_stride = GEN6_VERTSTRIDE_4; |
src->width = GEN6_WIDTH_4; |
src->horz_stride = GEN6_HORZSTRIDE_0; |
break; |
case TOY_RECT_240: |
src->vert_stride = GEN6_VERTSTRIDE_2; |
src->width = GEN6_WIDTH_4; |
src->horz_stride = GEN6_HORZSTRIDE_0; |
break; |
default: |
assert(!"unsupported src region"); |
src->vert_stride = GEN6_VERTSTRIDE_1; |
src->width = GEN6_WIDTH_1; |
src->horz_stride = GEN6_HORZSTRIDE_1; |
break; |
} |
} |
} |
|
/** |
* Generate HW shader code. The instructions should have been legalized. |
*/ |
void * |
toy_compiler_assemble(struct toy_compiler *tc, int *size) |
{ |
const struct toy_inst *inst; |
uint32_t *code; |
int pc; |
|
code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t)); |
if (!code) |
return NULL; |
|
pc = 0; |
tc_head(tc); |
while ((inst = tc_next(tc)) != NULL) { |
uint32_t *dw = &code[pc * 4]; |
struct codegen cg; |
|
if (pc >= tc->num_instructions) { |
tc_fail(tc, "wrong instructoun count"); |
break; |
} |
|
codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width); |
codegen_validate_region_restrictions(&cg); |
|
switch (inst->opcode) { |
case GEN6_OPCODE_MAD: |
codegen_inst_3src_gen6(&cg, dw); |
break; |
default: |
codegen_inst_gen6(&cg, dw); |
break; |
} |
|
pc++; |
} |
|
/* never return an invalid kernel */ |
if (tc->fail) { |
FREE(code); |
return NULL; |
} |
|
if (size) |
*size = pc * 4 * sizeof(uint32_t); |
|
return code; |
} |