/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/Makefile |
---|
0,0 → 1,12 |
ENVYAS ?= envyas |
all: gf100.asm.h gk104.asm.h gk110.asm.h gm107.asm.h |
gf100.asm.h: %.asm.h: %.asm |
$(ENVYAS) -a -W -mnvc0 -Vnvc0 $< -o $@ |
gk104.asm.h: %.asm.h: %.asm |
$(ENVYAS) -a -W -mnvc0 -Vnve4 $< -o $@ |
gk110.asm.h: %.asm.h: %.asm |
$(ENVYAS) -a -W -mgk110 $< -o $@ |
gm107.asm.h: %.asm.h: %.asm |
$(ENVYAS) -a -W -mgm107 $< -o $@ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gf100.asm |
---|
0,0 → 1,107 |
.section #gf100_builtin_code |
// DIV U32 |
// |
// UNR recurrence (q = a / b): |
// look for z such that 2^32 - b <= b * z < 2^32 |
// then q - 1 <= (a * z) / 2^32 <= q |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p1 |
// SIZE: 22 / 14 * 8 bytes |
// |
gf100_div_u32: |
bfind u32 $r2 $r1 |
xor b32 $r2 $r2 0x1f |
mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
cvt u32 $r1 neg u32 $r1 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
cvt u32 $r2 neg u32 $r1 |
add $r1 (mul u32 $r1 u32 $r0) $r3 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
ret |
// DIV S32, like DIV U32 after taking ABS(inputs) |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p3 |
// |
gf100_div_s32: |
set $p2 0x1 lt s32 $r0 0x0 |
set $p3 0x1 lt s32 $r1 0x0 xor $p2 |
cvt s32 $r0 abs s32 $r0 |
cvt s32 $r1 abs s32 $r1 |
bfind u32 $r2 $r1 |
xor b32 $r2 $r2 0x1f |
mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
cvt u32 $r1 neg u32 $r1 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
cvt u32 $r2 neg u32 $r1 |
add $r1 (mul u32 $r1 u32 $r0) $r3 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p3 cvt s32 $r0 neg s32 $r0 |
$p2 cvt s32 $r1 neg s32 $r1 |
ret |
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) |
// |
// INPUT: $r0d (x) |
// OUTPUT: $r0d (rcp(x)) |
// CLOBBER: $r2 - $r7 |
// SIZE: 9 * 8 bytes |
// |
gf100_rcp_f64: |
nop |
ret |
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) |
// |
// INPUT: $r0d (x) |
// OUTPUT: $r0d (rsqrt(x)) |
// CLOBBER: $r2 - $r7 |
// SIZE: 14 * 8 bytes |
// |
gf100_rsq_f64: |
nop |
ret |
.section #gf100_builtin_offsets |
.b64 #gf100_div_u32 |
.b64 #gf100_div_s32 |
.b64 #gf100_rcp_f64 |
.b64 #gf100_rsq_f64 |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gf100.asm.h |
---|
0,0 → 1,63 |
uint64_t gf100_builtin_code[] = { |
/* 0x0000: gf100_div_u32 */ |
0x7800000004009c03, |
0x0010dd187c209cdd, |
0x6000000008309c03, |
0x0810dc2a05605c18, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x280000000000dde4, |
0x5000000008001c43, |
0x0010430d05609c18, |
0x1b0e00000811dc03, |
0x4800000008104103, |
0x0800000004000002, |
0x1b0e00000811c003, |
0x4800000008104103, |
0x90001dff040000ac, |
/* 0x00b0: gf100_div_s32 */ |
0x188e0000fc05dc23, |
0x18c40000fc17dc23, |
0x07305e1803301e18, |
0x7800000004009c03, |
0x0010dd187c209cdd, |
0x6000000008309c03, |
0x0810dc2a05605c18, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x280000000000dde4, |
0x5000000008001c43, |
0x0010430d05609c18, |
0x1b0e00000811dc03, |
0x4800000008104103, |
0x0800000004000002, |
0x1b0e00000811c003, |
0x4800000008104103, |
0x01700e18040000ac, |
0x90001dff05704a18, |
/* 0x0180: gf100_rcp_f64 */ |
0x90001dff00001c08, |
/* 0x0188: gf100_rsq_f64 */ |
0x90001dff00001c08, |
}; |
uint64_t gf100_builtin_offsets[] = { |
0x0000000000000000, |
0x00000000000000b0, |
0x0000000000000180, |
0x0000000000000188, |
}; |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk104.asm |
---|
0,0 → 1,711 |
.section #gk104_builtin_code |
// DIV U32 |
// |
// UNR recurrence (q = a / b): |
// look for z such that 2^32 - b <= b * z < 2^32 |
// then q - 1 <= (a * z) / 2^32 <= q |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p1 |
// SIZE: 22 / 14 * 8 bytes |
// |
gk104_div_u32: |
sched 0x28 0x4 0x28 0x4 0x28 0x28 0x28 |
bfind u32 $r2 $r1 |
long xor b32 $r2 $r2 0x1f |
long mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
long cvt u32 $r1 neg u32 $r1 |
long mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
sched 0x4 0x28 0x4 0x28 0x28 0x2c 0x4 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
long cvt u32 $r2 neg u32 $r1 |
long add $r1 (mul u32 $r1 u32 $r0) $r3 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
sched 0x28 0x2c 0x4 0x20 0x2e 0x28 0x20 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
long ret |
// DIV S32, like DIV U32 after taking ABS(inputs) |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p3 |
// |
gk104_div_s32: |
set $p2 0x1 lt s32 $r0 0x0 |
set $p3 0x1 lt s32 $r1 0x0 xor $p2 |
sched 0x20 0x28 0x28 0x4 0x28 0x04 0x28 |
long cvt s32 $r0 abs s32 $r0 |
long cvt s32 $r1 abs s32 $r1 |
bfind u32 $r2 $r1 |
long xor b32 $r2 $r2 0x1f |
long mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
cvt u32 $r1 neg u32 $r1 |
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
sched 0x28 0x28 0x4 0x28 0x04 0x28 0x28 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
long cvt u32 $r2 neg u32 $r1 |
long add $r1 (mul u32 $r1 u32 $r0) $r3 |
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
long $p0 add b32 $r0 $r0 0x1 |
long $p3 cvt s32 $r0 neg s32 $r0 |
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c |
$p2 cvt s32 $r1 neg s32 $r1 |
long ret |
// SULDP [for each format] |
// $r4d: address |
// $r2: surface info (format) |
// $p0: access predicate |
// $p1, $p2: caching predicate (00: cv, 01: ca, 10: cg) |
// |
// RGBA32 |
$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 |
long ret |
// RGBA16_UNORM |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b128 $r0q ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b128 $r0q cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b128 $r0q cv zero u8 g[$r4d] $r2 $p0 |
cvt rn f32 $r3 u16 1 $r1 |
cvt rn f32 $r2 u16 0 $r1 |
mul f32 $r3 $r3 0x37800074 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt rn f32 $r1 u16 1 $r0 |
mul f32 $r2 $r2 0x37800074 |
cvt rn f32 $r0 u16 0 $r0 |
mul f32 $r1 $r1 0x37800074 |
mul f32 $r0 $r0 0x37800074 |
long ret |
// RGBA16_SNORM |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
cvt rn f32 $r3 s16 1 $r1 |
cvt rn f32 $r2 s16 0 $r1 |
mul f32 $r3 $r3 0x38000187 |
cvt rn f32 $r1 s16 1 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r2 $r2 0x38000187 |
cvt rn f32 $r0 s16 0 $r0 |
mul f32 $r1 $r1 0x38000187 |
mul f32 $r0 $r0 0x38000187 |
long ret |
// RGBA16_SINT |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
cvt s32 $r3 s16 1 $r1 |
cvt s32 $r2 s16 0 $r1 |
cvt s32 $r1 s16 1 $r0 |
cvt s32 $r0 s16 0 $r0 |
long ret |
// RGBA16_UINT |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
cvt u32 $r3 u16 1 $r1 |
cvt u32 $r2 u16 0 $r1 |
cvt u32 $r1 u16 1 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt u32 $r0 u16 0 $r0 |
long ret |
// RGBA16_FLOAT |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
cvt f32 $r3 f16 $r1 1 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt f32 $r2 f16 $r1 0 |
cvt f32 $r1 f16 $r0 1 |
cvt f32 $r0 f16 $r0 0 |
long ret |
// RG32_FLOAT |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r2 0x00000000 |
long mov b32 $r3 0x3f800000 |
long ret |
// RG32_xINT |
$p1 suldgb b64 $r0d ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b64 $r0d cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b64 $r0d cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r2 0x00000000 |
long mov b32 $r3 0x00000001 |
long ret |
// RGB10A2_UNORM |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
ext u32 $r1 $r0 0x0a0a |
long mov b32 $r3 0x3f800000 |
ext u32 $r2 $r0 0x0a14 |
long and b32 $r0 $r0 0x3ff |
cvt rn f32 $r2 u16 0 $r2 |
cvt rn f32 $r1 u16 0 $r1 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r2 $r2 0x3a802007 |
cvt rn f32 $r0 u16 0 $r0 |
mul f32 $r1 $r1 0x3a802007 |
mul f32 $r0 $r0 0x3a802007 |
long ret |
// RGB10A2_UINT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
ext u32 $r1 $r0 0x0a0a |
long mov b32 $r3 0x00000001 |
ext u32 $r2 $r0 0x0a14 |
long and b32 $r0 $r0 0x3ff |
long ret |
// RGBA8_UNORM |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
cvt rn f32 $r3 u8 3 $r0 |
cvt rn f32 $r2 u8 2 $r0 |
mul f32 $r3 $r3 0x3b808081 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt rn f32 $r1 u8 1 $r0 |
mul f32 $r2 $r2 0x3b808081 |
cvt rn f32 $r0 u8 0 $r0 |
mul f32 $r1 $r1 0x3b808081 |
mul f32 $r0 $r0 0x3b808081 |
long ret |
// RGBA8_SNORM |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
cvt rn f32 $r3 s8 3 $r0 |
cvt rn f32 $r2 s8 2 $r0 |
mul f32 $r3 $r3 0x3c010204 |
cvt rn f32 $r1 s8 1 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r2 $r2 0x3c010204 |
cvt rn f32 $r0 s8 0 $r0 |
mul f32 $r1 $r1 0x3c010204 |
mul f32 $r0 $r0 0x3c010204 |
long ret |
// RGBA8_SINT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
cvt s32 $r3 s8 3 $r0 |
cvt s32 $r2 s8 2 $r0 |
cvt s32 $r1 s8 1 $r0 |
cvt s32 $r0 s8 0 $r0 |
long ret |
// RGBA8_UINT |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
cvt u32 $r3 u8 3 $r0 |
cvt u32 $r2 u8 2 $r0 |
cvt u32 $r1 u8 1 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt u32 $r0 u8 0 $r0 |
long ret |
// R5G6B5_UNORM |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
ext u32 $r1 $r0 0x0605 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long mov b32 $r3 0x3f800000 |
ext u32 $r2 $r0 0x050b |
long and b32 $r0 $r0 0x1f |
cvt rn f32 $r2 u8 0 $r2 |
cvt rn f32 $r1 u8 0 $r1 |
mul f32 $r2 $r2 0x3d042108 |
cvt rn f32 $r0 u8 0 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r1 $r1 0x3c820821 |
mul f32 $r0 $r0 0x3d042108 |
long ret |
// R5G5B5X1_UNORM |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
ext u32 $r1 $r0 0x0505 |
ext u32 $r2 $r0 0x050a |
long and b32 $r0 $r0 0x1f |
long mov b32 $r3 0x3f800000 |
cvt rn f32 $r2 u8 0 $r2 |
cvt rn f32 $r1 u8 0 $r1 |
cvt rn f32 $r0 u8 0 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r2 $r2 0x3d042108 |
mul f32 $r1 $r1 0x3d042108 |
mul f32 $r0 $r0 0x3d042108 |
long ret |
// RG16_UNORM |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
cvt rn f32 $r1 u16 1 $r0 |
cvt rn f32 $r0 u16 0 $r0 |
mul f32 $r1 $r1 0x37800074 |
mul f32 $r0 $r0 0x37800074 |
long mov b32 $r2 0x00000000 |
long mov b32 $r3 0x3f800000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long ret |
// RG16_SNORM |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x3f800000 |
cvt rn f32 $r1 s16 1 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mov b32 $r2 0x00000000 |
cvt rn f32 $r0 s16 0 $r0 |
mul f32 $r1 $r1 0x38000187 |
mul f32 $r0 $r0 0x38000187 |
long ret |
// RG16_SINT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x00000001 |
cvt s32 $r1 s16 1 $r0 |
mov b32 $r2 0x00000000 |
cvt s32 $r0 s16 0 $r0 |
long ret |
// RG16_UINT |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x00000001 |
cvt u32 $r1 u16 1 $r0 |
mov b32 $r2 0x00000000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt u32 $r0 u16 0 $r0 |
long ret |
// RG16_FLOAT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x3f800000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt f32 $r1 f16 $r0 1 |
mov b32 $r2 0x00000000 |
cvt f32 $r0 f16 $r0 0 |
long ret |
// R32_FLOAT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x3f800000 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
long ret |
// R32_xINT |
$p1 suldgb b32 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb b32 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
long ret |
// RG8_UNORM |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x3f800000 |
cvt rn f32 $r1 u8 1 $r0 |
mov b32 $r2 0x00000000 |
cvt rn f32 $r0 u8 0 $r0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r1 $r1 0x3b808081 |
mul f32 $r0 $r0 0x3b808081 |
long ret |
// RG8_SNORM |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long mov b32 $r3 0x3f800000 |
cvt rn f32 $r1 s8 1 $r0 |
long mov b32 $r2 0x00000000 |
cvt rn f32 $r0 s8 0 $r0 |
mul f32 $r1 $r1 0x3c010204 |
mul f32 $r0 $r0 0x3c010204 |
long ret |
// RG8_UINT |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
cvt u32 $r1 u8 1 $r0 |
long mov b32 $r2 0x00000000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt u32 $r0 u8 0 $r0 |
long ret |
// RG8_SINT |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
cvt s32 $r1 s8 1 $r0 |
long mov b32 $r2 0x00000000 |
cvt s32 $r0 s8 0 $r0 |
long ret |
// R16_UNORM |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x3f800000 |
cvt rn f32 $r0 u16 0 $r0 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
mul f32 $r0 $r0 0x37800074 |
long ret |
// R16_SNORM |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x3f800000 |
cvt rn f32 $r0 s16 0 $r0 |
long mov b32 $r2 0x00000000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long mov b32 $r1 0x00000000 |
mul f32 $r0 $r0 0x38000187 |
long ret |
// R16_SINT |
$p1 suldgb s16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb s16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb s16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long mov b32 $r3 0x00000001 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
long ret |
// R16_UINT |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
long ret |
// R16_FLOAT |
$p1 suldgb u16 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p2 suldgb u16 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u16 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x3f800000 |
long mov b32 $r2 0x00000000 |
cvt f32 $r0 f16 $r0 0 |
mov b32 $r1 0x00000000 |
long ret |
// R8_UNORM |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 |
mov b32 $r3 0x3f800000 |
cvt rn f32 $r0 u8 0 $r0 |
mov b32 $r2 0x00000000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mul f32 $r0 $r0 0x3b808081 |
mov b32 $r1 0x00000000 |
long ret |
// R8_SNORM |
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
mov b32 $r3 0x3f800000 |
cvt rn f32 $r0 s8 0 $r0 |
mov b32 $r2 0x00000000 |
mul f32 $r0 $r0 0x3c010204 |
mov b32 $r1 0x00000000 |
long ret |
// R8_SINT |
$p1 suldgb s8 $r0 ca zero u8 g[$r4d] $r2 $p0 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb s8 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb s8 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
long ret |
// R8_UINT |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
$p1 suldgb u8 $r0 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb u8 $r0 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb u8 $r0 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x00000001 |
long mov b32 $r2 0x00000000 |
long mov b32 $r1 0x00000000 |
sched 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
long ret |
// R11G11B10_FLOAT TODO |
$p1 suldgb b32 $r3 ca zero u8 g[$r4d] $r2 $p0 |
set $p1 0x1 $p1 xor not $p2 |
$p2 suldgb b32 $r3 cg zero u8 g[$r4d] $r2 $p0 |
$p1 suldgb b32 $r3 cv zero u8 g[$r4d] $r2 $p0 |
long mov b32 $r3 0x3f800000 |
long nop |
long ret |
// RCP F64: Newton Raphson reciprocal(x): r_{i+1} = r_i * (2.0 - x * r_i) |
// |
// INPUT: $r0d (x) |
// OUTPUT: $r0d (rcp(x)) |
// CLOBBER: $r2 - $r7 |
// SIZE: 9 * 8 bytes |
// |
gk104_rcp_f64: |
long nop |
long ret |
// RSQ F64: Newton Raphson rsqrt(x): r_{i+1} = r_i * (1.5 - 0.5 * x * r_i * r_i) |
// |
// INPUT: $r0d (x) |
// OUTPUT: $r0d (rsqrt(x)) |
// CLOBBER: $r2 - $r7 |
// SIZE: 14 * 8 bytes |
// |
gk104_rsq_f64: |
long nop |
long ret |
// |
// Trap handler. |
// Requires at least 4 GPRs and 32 bytes of l[] memory to temporarily save GPRs. |
// Low 32 bytes of l[] memory shouldn't be used if resumeability is required. |
// |
// Trap info: |
// 0x000: mutex |
// 0x004: PC |
// 0x008: trapstat |
// 0x00c: warperr |
// 0x010: tidx |
// 0x014: tidy |
// 0x018: tidz |
// 0x01c: ctaidx |
// 0x020: ctaidy |
// 0x024: ctaidz |
// 0x030: $r0q |
// 0x130: $flags |
// 0x140: s[] |
// |
st b128 wb l[0x00] $r0q |
// check state of the warp and continue if it didn't cause the trap |
long mov b32 $r1 $trapstat |
long mov b32 $r3 $warperr |
mov $r2 $flags mask 0xffff |
and b32 0 $c $r1 $r3 |
e $c bra #end_cont |
// spill control flow stack to l[] |
long mov b32 $r3 16 |
spill_cfstack: |
preret #end_exit |
sub b32 $r3 $c $r3 0x1 |
lg $c bra #spill_cfstack |
// retrieve pointer to trap info |
mov b32 $r0 c0[0x1900] |
mov b32 $r1 c0[0x1904] |
// we only let a single faulting thread store its state |
mov b32 $r3 0x1 |
exch b32 $r3 g[$r0d] $r3 |
joinat #end_exit |
set $p0 0x1 eq u32 $r3 0x1 |
join $p0 nop |
// store $c and $p registers |
st b32 wb g[$r0d+0x130] $r2 |
// store $trapstat and $warperr |
long mov b32 $r2 $trapstat |
long mov b32 $r3 $warperr |
st b64 wb g[$r0d+0x8] $r2d |
// store registers |
st b128 wb g[$r0d+0x40] $r4q |
st b128 wb g[$r0d+0x50] $r8q |
st b128 wb g[$r0d+0x60] $r12q |
st b128 wb g[$r0d+0x70] $r16q |
st b128 wb g[$r0d+0x80] $r20q |
st b128 wb g[$r0d+0x90] $r24q |
st b128 wb g[$r0d+0xa0] $r28q |
st b128 wb g[$r0d+0xb0] $r32q |
st b128 wb g[$r0d+0xc0] $r36q |
st b128 wb g[$r0d+0xd0] $r40q |
st b128 wb g[$r0d+0xe0] $r44q |
st b128 wb g[$r0d+0xf0] $r48q |
st b128 wb g[$r0d+0x100] $r52q |
st b128 wb g[$r0d+0x110] $r56q |
st b128 wb g[$r0d+0x120] $r60q |
ld b64 $r2d cs l[0x0] |
st b64 wb g[$r0d+0x30] $r2d |
ld b64 $r2d cs l[0x8] |
st b64 wb g[$r0d+0x38] $r2d |
// store thread id |
long mov b32 $r2 $tidx |
long mov b32 $r3 $tidy |
st b64 wb g[$r0d+0x10] $r2d |
long mov b32 $r2 $tidz |
long mov b32 $r3 $ctaidx |
st b64 wb g[$r0d+0x18] $r2d |
long mov b32 $r2 $ctaidy |
long mov b32 $r3 $ctaidz |
st b64 wb g[$r0d+0x20] $r2d |
// store shared memory (in reverse order so $r0d is base again at the end) |
long mov b32 $r3 $smemsz |
sub b32 $r3 $c $r3 0x4 |
s $c bra #shared_done |
add b32 $r0 $c $r0 $r3 |
add b32 $r1 $r1 0x0 $c |
shared_loop: |
long ld b32 $r2 s[$r3] |
long st b32 wb g[$r0d+0x140] $r2 |
sub b32 $r0 $c $r0 0x4 |
sub b32 $r1 $r1 0x0 $c |
sub b32 $r3 $c $r3 0x4 |
lg $c bra #shared_loop |
shared_done: |
// search the stack for trap entry to retrieve PC |
mov b32 $r0 c0[0x1908] |
mov b32 $r1 c0[0x190c] |
membar sys |
// invalidate caches so we can read stack entries via g[] |
cctl ivall 0 l[0] |
cctl ivall 0 g[$r0d] |
// get offsets |
mov b32 $r2 $physid |
ext u32 $r3 $r2 0x0814 // MP id |
ext u32 $r2 $r2 0x0608 // warp id |
mul $r2 u32 $r2 u32 c0[0x1914] // warp offset |
mul $r3 u32 $r3 u32 c0[0x1910] // MP offset |
add b32 $r2 $r2 $r3 // MP + warp offset |
add b32 $r0 $c $r0 $r2 |
add b32 $r1 $r1 0x0 $c |
search_cstack: |
mov b32 $r3 c0[0x1918] // cstack size |
ld u8 $r2 cv g[$r0d+0x8] |
set $p0 0x1 eq u32 $r2 0xa |
$p0 bra #entry_found |
add b32 $r0 $c $r0 0x10 |
add b32 $r1 $r1 0x0 $c |
sub b32 $r3 $c $r3 0x10 |
lg $c bra #search_cstack |
bra #end_exit |
entry_found: |
// load PC (may be unaligned and spread out) |
ld b32 $r2 cv g[$r0d] |
mov b32 $r0 c0[0x1900] |
mov b32 $r1 c0[0x1904] |
st b32 wb g[$r0d+0x4] $r2 |
join nop |
// invalidate caches and exit |
end_exit: |
cctl ivall 0 g[0] |
bpt pause 0x0 |
rtt terminate |
end_cont: |
bpt pause 0x0 |
mov $flags $r2 mask 0xffff |
ld b128 $r0q cs l[0x00] |
rtt |
.section #gk104_builtin_offsets |
.b64 #gk104_div_u32 |
.b64 #gk104_div_s32 |
.b64 #gk104_rcp_f64 |
.b64 #gk104_rsq_f64 |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk104.asm.h |
---|
0,0 → 1,598 |
uint64_t gk104_builtin_code[] = { |
/* 0x0000: gk104_div_u32 */ |
0x2282828042804287, |
0x7800000004009c03, |
0x380000007c209c82, |
0x180000000400dde2, |
0x6000000008309c03, |
0x1c00000005205d04, |
0x500000000810dc03, |
0x200400000c209c43, |
0x2282828282828287, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x2042c28280428047, |
0x200400000c209c43, |
0x280000000000dde4, |
0x5000000008001c43, |
0x1c00000005209d04, |
0x2006000000105c03, |
0x1b0e00000811dc03, |
0x4800000008104103, |
0x220282e20042c287, |
0x0800000004000002, |
0x1b0e00000811c003, |
0x4800000008104103, |
0x0800000004000002, |
0x9000000000001de7, |
/* 0x00f0: gk104_div_s32 */ |
0x188e0000fc05dc23, |
0x18c40000fc17dc23, |
0x2280428042828207, |
0x1c00000001201ec4, |
0x1c00000005205ec4, |
0x7800000004009c03, |
0x380000007c209c82, |
0x180000000400dde2, |
0x6000000008309c03, |
0x1c00000005205d04, |
0x2282828282828287, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x500000000810dc03, |
0x2282804280428287, |
0x200400000c209c43, |
0x500000000810dc03, |
0x200400000c209c43, |
0x280000000000dde4, |
0x5000000008001c43, |
0x1c00000005209d04, |
0x2006000000105c03, |
0x22028042c28042c7, |
0x1b0e00000811dc03, |
0x4800000008104103, |
0x0800000004000002, |
0x1b0e00000811c003, |
0x4800000008104103, |
0x0800000004000002, |
0x1c00000001200f84, |
0x22c200428042e047, |
0x1c00000005204b84, |
0x9000000000001de7, |
0xd4004000084004c5, |
0x0c5400000013dc04, |
0xd4004000084009c5, |
0xd4004000084007c5, |
0x9000000000001de7, |
0x2000000000000007, |
0xd4004000084004c5, |
0x0c5400000013dc04, |
0xd4004000084009c5, |
0xd4004000084007c5, |
0x1900000004a0dc04, |
0x1800000004a09c04, |
0x30de0001d030dc02, |
0x2000000000000007, |
0x1900000000a05c04, |
0x30de0001d0209c02, |
0x1800000000a01c04, |
0x30de0001d0105c02, |
0x30de0001d0001c02, |
0x9000000000001de7, |
0xd4004000084004a5, |
0x2000000000000007, |
0x0c5400000013dc04, |
0xd4004000084009a5, |
0xd4004000084007a5, |
0x1900000004a0de04, |
0x1800000004a09e04, |
0x30e000061c30dc02, |
0x1900000000a05e04, |
0x2000000000000007, |
0x30e000061c209c02, |
0x1800000000a01e04, |
0x30e000061c105c02, |
0x30e000061c001c02, |
0x9000000000001de7, |
0xd4004000084004a5, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd4004000084009a5, |
0xd4004000084007a5, |
0x1d00000004a0de84, |
0x1c00000004a09e84, |
0x1d00000000a05e84, |
0x1c00000000a01e84, |
0x9000000000001de7, |
0x2000000000000007, |
0xd4004000084004a5, |
0x0c5400000013dc04, |
0xd4004000084009a5, |
0xd4004000084007a5, |
0x1d00000004a0dc04, |
0x1c00000004a09c04, |
0x1d00000000a05c04, |
0x2000000000000007, |
0x1c00000000a01c04, |
0x9000000000001de7, |
0xd4004000084004a5, |
0x0c5400000013dc04, |
0xd4004000084009a5, |
0xd4004000084007a5, |
0x1100000004a0dc04, |
0x2000000000000007, |
0x1000000004a09c04, |
0x1100000000a05c04, |
0x1000000000a01c04, |
0x9000000000001de7, |
0xd4004000084004a5, |
0x0c5400000013dc04, |
0xd4004000084009a5, |
0x2000000000000007, |
0xd4004000084007a5, |
0x1800000000009de2, |
0x18fe00000000dde2, |
0x9000000000001de7, |
0xd4004000084004a5, |
0x0c5400000013dc04, |
0xd4004000084009a5, |
0x2000000000000007, |
0xd4004000084007a5, |
0x1800000000009de2, |
0x180000000400dde2, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0x2000000000000007, |
0xd400400008400785, |
0x7000c02828005c03, |
0x18fe00000000dde2, |
0x7000c02850009c03, |
0x3800000ffc001c02, |
0x1800000008a09c04, |
0x1800000004a05c04, |
0x2000000000000007, |
0x30ea00801c209c02, |
0x1800000000a01c04, |
0x30ea00801c105c02, |
0x30ea00801c001c02, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd400400008400985, |
0xd400400008400785, |
0x7000c02828005c03, |
0x180000000400dde2, |
0x7000c02850009c03, |
0x3800000ffc001c02, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x198000000020dc04, |
0x1900000000209c04, |
0x30ee02020430dc02, |
0x2000000000000007, |
0x1880000000205c04, |
0x30ee020204209c02, |
0x1800000000201c04, |
0x30ee020204105c02, |
0x30ee020204001c02, |
0x9000000000001de7, |
0xd400400008400485, |
0x2000000000000007, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x198000000020de04, |
0x1900000000209e04, |
0x30f004081030dc02, |
0x1880000000205e04, |
0x2000000000000007, |
0x30f0040810209c02, |
0x1800000000201e04, |
0x30f0040810105c02, |
0x30f0040810001c02, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd400400008400985, |
0xd400400008400785, |
0x1d8000000020de84, |
0x1d00000000209e84, |
0x1c80000000205e84, |
0x1c00000000201e84, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x1d8000000020dc04, |
0x1d00000000209c04, |
0x1c80000000205c04, |
0x2000000000000007, |
0x1c00000000201c04, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x7000c01814005c03, |
0x2000000000000007, |
0x18fe00000000dde2, |
0x7000c0142c009c03, |
0x380000007c001c02, |
0x1800000008209c04, |
0x1800000004205c04, |
0x30f4108420209c02, |
0x1800000000201c04, |
0x2000000000000007, |
0x30f2082084105c02, |
0x30f4108420001c02, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x2000000000000007, |
0x7000c01414005c03, |
0x7000c01428009c03, |
0x380000007c001c02, |
0x18fe00000000dde2, |
0x1800000008209c04, |
0x1800000004205c04, |
0x1800000000201c04, |
0x2000000000000007, |
0x30f4108420209c02, |
0x30f4108420105c02, |
0x30f4108420001c02, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0x2000000000000007, |
0xd400400008400785, |
0x1900000000a05c04, |
0x1800000000a01c04, |
0x30de0001d0105c02, |
0x30de0001d0001c02, |
0x1800000000009de2, |
0x18fe00000000dde2, |
0x2000000000000007, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x18fe00000000dde2, |
0x1900000000a05e04, |
0x2000000000000007, |
0x1800000000009de2, |
0x1800000000a01e04, |
0x30e000061c105c02, |
0x30e000061c001c02, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd400400008400985, |
0xd400400008400785, |
0x180000000400dde2, |
0x1d00000000a05e84, |
0x1800000000009de2, |
0x1c00000000a01e84, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x180000000400dde2, |
0x1d00000000a05c04, |
0x1800000000009de2, |
0x2000000000000007, |
0x1c00000000a01c04, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0xd400400008400785, |
0x18fe00000000dde2, |
0x2000000000000007, |
0x1100000000a05c04, |
0x1800000000009de2, |
0x1000000000a01c04, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0xd400400008400985, |
0x2000000000000007, |
0xd400400008400785, |
0x18fe00000000dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400485, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd400400008400985, |
0xd400400008400785, |
0x180000000400dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400445, |
0x2000000000000007, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x18fe00000000dde2, |
0x1880000000205c04, |
0x1800000000009de2, |
0x1800000000201c04, |
0x2000000000000007, |
0x30ee020204105c02, |
0x30ee020204001c02, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x2000000000000007, |
0x18fe00000000dde2, |
0x1880000000205e04, |
0x1800000000009de2, |
0x1800000000201e04, |
0x30f0040810105c02, |
0x30f0040810001c02, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x180000000400dde2, |
0x1c80000000205c04, |
0x1800000000009de2, |
0x2000000000000007, |
0x1c00000000201c04, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x180000000400dde2, |
0x2000000000000007, |
0x1c80000000205e84, |
0x1800000000009de2, |
0x1c00000000201e84, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0x2000000000000007, |
0xd400400008400745, |
0x18fe00000000dde2, |
0x1800000000a01c04, |
0x1800000000009de2, |
0x1800000000005de2, |
0x30de0001d0001c02, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0xd400400008400745, |
0x18fe00000000dde2, |
0x1800000000a01e04, |
0x1800000000009de2, |
0x2000000000000007, |
0x1800000000005de2, |
0x30e000061c001c02, |
0x9000000000001de7, |
0xd400400008400465, |
0x0c5400000013dc04, |
0xd400400008400965, |
0xd400400008400765, |
0x2000000000000007, |
0x180000000400dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0xd400400008400945, |
0x2000000000000007, |
0xd400400008400745, |
0x180000000400dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400445, |
0x0c5400000013dc04, |
0x2000000000000007, |
0xd400400008400945, |
0xd400400008400745, |
0x18fe00000000dde2, |
0x1800000000009de2, |
0x1000000000a01c04, |
0x1800000000005de2, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400405, |
0x0c5400000013dc04, |
0xd400400008400905, |
0xd400400008400705, |
0x18fe00000000dde2, |
0x1800000000201c04, |
0x1800000000009de2, |
0x2000000000000007, |
0x30ee020204001c02, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400405, |
0x0c5400000013dc04, |
0xd400400008400905, |
0xd400400008400705, |
0x2000000000000007, |
0x18fe00000000dde2, |
0x1800000000201e04, |
0x1800000000009de2, |
0x30f0040810001c02, |
0x1800000000005de2, |
0x9000000000001de7, |
0xd400400008400425, |
0x2000000000000007, |
0x0c5400000013dc04, |
0xd400400008400925, |
0xd400400008400725, |
0x180000000400dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x9000000000001de7, |
0x2000000000000007, |
0xd400400008400405, |
0x0c5400000013dc04, |
0xd400400008400905, |
0xd400400008400705, |
0x180000000400dde2, |
0x1800000000009de2, |
0x1800000000005de2, |
0x2000000000000007, |
0x9000000000001de7, |
0xd40040000840c485, |
0x0c5400000013dc04, |
0xd40040000840c985, |
0xd40040000840c785, |
0x18fe00000000dde2, |
0x4000000000001de4, |
0x9000000000001de7, |
/* 0x0f08: gk104_rcp_f64 */ |
0x4000000000001de4, |
0x9000000000001de7, |
/* 0x0f18: gk104_rsq_f64 */ |
0x4000000000001de4, |
0x9000000000001de7, |
0xc800000003f01cc5, |
0x2c00000100005c04, |
0x2c0000010800dc04, |
0x3000c3fffff09c04, |
0x680100000c1fdc03, |
0x4000000a60001c47, |
0x180000004000dde2, |
/* 0x0f60: spill_cfstack */ |
0x78000009c0000007, |
0x0c0000000430dd02, |
0x4003ffffa0001ca7, |
0x2800406400001de4, |
0x2800406410005de4, |
0x180000000400dde2, |
0x547e18000000dd05, |
0x60000008e0000007, |
0x190ec0000431dc03, |
0x40000000000001f4, |
0x94000004c0009c85, |
0x2c00000100009c04, |
0x2c0000010800dc04, |
0x9400000020009ca5, |
0x9400000100011cc5, |
0x9400000140021cc5, |
0x9400000180031cc5, |
0x94000001c0041cc5, |
0x9400000200051cc5, |
0x9400000240061cc5, |
0x9400000280071cc5, |
0x94000002c0081cc5, |
0x9400000300091cc5, |
0x94000003400a1cc5, |
0x94000003800b1cc5, |
0x94000003c00c1cc5, |
0x94000004000d1cc5, |
0x94000004400e1cc5, |
0x94000004800f1cc5, |
0xc000000003f09ea5, |
0x94000000c0009ca5, |
0xc000000023f09ea5, |
0x94000000e0009ca5, |
0x2c00000084009c04, |
0x2c0000008800dc04, |
0x9400000040009ca5, |
0x2c0000008c009c04, |
0x2c0000009400dc04, |
0x9400000060009ca5, |
0x2c00000098009c04, |
0x2c0000009c00dc04, |
0x9400000080009ca5, |
0x2c000000c800dc04, |
0x0c0000001030dd02, |
0x4000000100001ea7, |
0x480100000c001c03, |
0x0800000000105c42, |
/* 0x10d8: shared_loop */ |
0xc100000000309c85, |
0x9400000500009c85, |
0x0c00000010001d02, |
0x0800000000105d42, |
0x0c0000001030dd02, |
0x4003ffff40001ca7, |
/* 0x1108: shared_done */ |
0x2800406420001de4, |
0x2800406430005de4, |
0xe000000000001c45, |
0xd000000003ffdcc5, |
0x9c000000000fdcc5, |
0x2c0000000c009c04, |
0x7000c0205020dc03, |
0x7000c01820209c03, |
0x5000406450209c03, |
0x500040644030dc03, |
0x480000000c209c03, |
0x4801000008001c03, |
0x0800000000105c42, |
/* 0x1170: search_cstack */ |
0x280040646000dde4, |
0x8400000020009f05, |
0x190ec0002821dc03, |
0x40000000800001e7, |
0x0c00000040001c02, |
0x0800000000105c42, |
0x0c0000004030dd02, |
0x00029dff0ffc5cbf, |
/* 0x11b0: entry_found */ |
0x8400000000009f85, |
0x2800406400001de4, |
0x2800406410005de4, |
0x9400000010009c85, |
0x4000000000001df4, |
/* 0x11d8: end_exit */ |
0x9800000003ffdcc5, |
0xd000000000008007, |
0xa000000000004007, |
/* 0x11f0: end_cont */ |
0xd000000000008007, |
0x3400c3fffc201c04, |
0xc000000003f01ec5, |
0xa000000000000007, |
}; |
uint64_t gk104_builtin_offsets[] = { |
0x0000000000000000, |
0x00000000000000f0, |
0x0000000000000f08, |
0x0000000000000f18, |
}; |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk110.asm |
---|
0,0 → 1,98 |
.section #gk110_builtin_code |
// DIV U32 |
// |
// UNR recurrence (q = a / b): |
// look for z such that 2^32 - b <= b * z < 2^32 |
// then q - 1 <= (a * z) / 2^32 <= q |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p1 |
// SIZE: 22 / 14 * 8 bytes |
// |
gk110_div_u32: |
sched 0x28 0x04 0x28 0x04 0x28 0x28 0x28 |
bfind u32 $r2 $r1 |
xor b32 $r2 $r2 0x1f |
mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
cvt u32 $r1 neg u32 $r1 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
sched 0x04 0x28 0x04 0x28 0x28 0x2c 0x04 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
cvt u32 $r2 neg u32 $r1 |
add $r1 (mul u32 $r1 u32 $r0) $r3 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
sched 0x28 0x2c 0x04 0x20 0x2e 0x28 0x20 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
ret |
// DIV S32, like DIV U32 after taking ABS(inputs) |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p3 |
// |
gk110_div_s32: |
set $p2 0x1 lt s32 $r0 0x0 |
set $p3 0x1 lt s32 $r1 0x0 xor $p2 |
sched 0x20 0x28 0x28 0x04 0x28 0x04 0x28 |
cvt s32 $r0 abs s32 $r0 |
cvt s32 $r1 abs s32 $r1 |
bfind u32 $r2 $r1 |
xor b32 $r2 $r2 0x1f |
mov b32 $r3 0x1 |
shl b32 $r2 $r3 clamp $r2 |
cvt u32 $r1 neg u32 $r1 |
sched 0x28 0x28 0x28 0x28 0x28 0x28 0x28 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
sched 0x28 0x28 0x04 0x28 0x04 0x28 0x28 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mul $r3 u32 $r1 u32 $r2 |
add $r2 (mul high u32 $r2 u32 $r3) $r2 |
mov b32 $r3 $r0 |
mul high $r0 u32 $r0 u32 $r2 |
cvt u32 $r2 neg u32 $r1 |
add $r1 (mul u32 $r1 u32 $r0) $r3 |
sched 0x2c 0x04 0x28 0x2c 0x04 0x28 0x20 |
set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p0 set $p0 0x1 ge u32 $r1 $r2 |
$p0 sub b32 $r1 $r1 $r2 |
$p0 add b32 $r0 $r0 0x1 |
$p3 cvt s32 $r0 neg s32 $r0 |
sched 0x04 0x2e 0x04 0x28 0x04 0x20 0x2c |
$p2 cvt s32 $r1 neg s32 $r1 |
ret |
gk110_rcp_f64: |
gk110_rsq_f64: |
ret |
.section #gk110_builtin_offsets |
.b64 #gk110_div_u32 |
.b64 #gk110_div_s32 |
.b64 #gk110_rcp_f64 |
.b64 #gk110_rsq_f64 |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gk110.asm.h |
---|
0,0 → 1,81 |
uint64_t gk110_builtin_code[] = { |
/* 0x0000: gk110_div_u32 */ |
0x08a0a0a010a010a0, |
0xe1800000009c000a, |
0x220000000f9c0808, |
0x74000000009fc00e, |
0xe2400000011c0c0a, |
0xe6010000009c2806, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0x08a0a0a0a0a0a0a0, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0x0810b0a0a010a010, |
0xd2000800019c080a, |
0xe4c03c00001c000e, |
0xe1c00400011c0002, |
0xe6010000009c280a, |
0xd0000c00001c0406, |
0xdb601c00011c041e, |
0xe088000001000406, |
0x0880a0b88010b0a0, |
0x4000000000800001, |
0xdb601c000100041e, |
0xe088000001000406, |
0x4000000000800001, |
0x19000000001c003c, |
/* 0x00f0: gk110_div_s32 */ |
0xdb181c007f9c005e, |
0xdb1a08007f9c047e, |
0x08a010a010a0a080, |
0xe6100000001ce802, |
0xe6100000009ce806, |
0xe1800000009c000a, |
0x220000000f9c0808, |
0x74000000009fc00e, |
0xe2400000011c0c0a, |
0xe6010000009c2806, |
0x08a0a0a0a0a0a0a0, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0x08a0a010a010a0a0, |
0xd2000800019c080a, |
0xe1c00000011c040e, |
0xd2000800019c080a, |
0xe4c03c00001c000e, |
0xe1c00400011c0002, |
0xe6010000009c280a, |
0xd0000c00001c0406, |
0x0880a010b0a010b0, |
0xdb601c00011c041e, |
0xe088000001000406, |
0x4000000000800001, |
0xdb601c000100041e, |
0xe088000001000406, |
0x4000000000800001, |
0xe6010000000ce802, |
0x08b08010a010b810, |
0xe60100000088e806, |
0x19000000001c003c, |
/* 0x0218: gk110_rcp_f64 */ |
/* 0x0218: gk110_rsq_f64 */ |
0x19000000001c003c, |
}; |
uint64_t gk110_builtin_offsets[] = { |
0x0000000000000000, |
0x00000000000000f0, |
0x0000000000000218, |
0x0000000000000218, |
}; |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gm107.asm |
---|
0,0 → 1,115 |
.section #gm107_builtin_code |
// DIV U32 |
// |
// UNR recurrence (q = a / b): |
// look for z such that 2^32 - b <= b * z < 2^32 |
// then q - 1 <= (a * z) / 2^32 <= q |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p1 |
// SIZE: 22 / 14 * 8 bytes |
// |
gm107_div_u32: |
sched 0x7e0 0x7e0 0x7e0 |
flo u32 $r2 $r1 |
lop xor 1 $r2 $r2 0x1f |
mov $r3 0x1 0xf |
sched 0x7e0 0x7e0 0x7e0 |
shl $r2 $r3 $r2 |
i2i u32 u32 $r1 neg $r1 |
imul u32 u32 $r3 $r1 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
mov $r3 $r0 0xf |
imul u32 u32 hi $r0 $r0 $r2 |
i2i u32 u32 $r2 neg $r1 |
sched 0x7e0 0x7e0 0x7e0 |
imad u32 u32 $r1 $r1 $r0 $r3 |
isetp ge u32 and $p0 1 $r1 $r2 1 |
$p0 iadd $r1 $r1 neg $r2 |
sched 0x7e0 0x7e0 0x7e0 |
$p0 iadd $r0 $r0 0x1 |
$p0 isetp ge u32 and $p0 1 $r1 $r2 1 |
$p0 iadd $r1 $r1 neg $r2 |
sched 0x7e0 0x7e0 0x7e0 |
$p0 iadd $r0 $r0 0x1 |
ret |
nop 0 |
// DIV S32, like DIV U32 after taking ABS(inputs) |
// |
// INPUT: $r0: dividend, $r1: divisor |
// OUTPUT: $r0: result, $r1: modulus |
// CLOBBER: $r2 - $r3, $p0 - $p3 |
// |
gm107_div_s32: |
sched 0x7e0 0x7e0 0x7e0 |
isetp lt and $p2 0x1 $r0 0 1 |
isetp lt xor $p3 1 $r1 0 $p2 |
i2i s32 s32 $r0 abs $r0 |
sched 0x7e0 0x7e0 0x7e0 |
i2i s32 s32 $r1 abs $r1 |
flo u32 $r2 $r1 |
lop xor 1 $r2 $r2 0x1f |
sched 0x7e0 0x7e0 0x7e0 |
mov $r3 0x1 0xf |
shl $r2 $r3 $r2 |
i2i u32 u32 $r1 neg $r1 |
sched 0x7e0 0x7e0 0x7e0 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imul u32 u32 $r3 $r1 $r2 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
imul u32 u32 $r3 $r1 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
imad u32 u32 hi $r2 $r2 $r3 $r2 |
mov $r3 $r0 0xf |
imul u32 u32 hi $r0 $r0 $r2 |
sched 0x7e0 0x7e0 0x7e0 |
i2i u32 u32 $r2 neg $r1 |
imad u32 u32 $r1 $r1 $r0 $r3 |
isetp ge u32 and $p0 1 $r1 $r2 1 |
sched 0x7e0 0x7e0 0x7e0 |
$p0 iadd $r1 $r1 neg $r2 |
$p0 iadd $r0 $r0 0x1 |
$p0 isetp ge u32 and $p0 1 $r1 $r2 1 |
sched 0x7e0 0x7e0 0x7e0 |
$p0 iadd $r1 $r1 neg $r2 |
$p0 iadd $r0 $r0 0x1 |
$p3 i2i s32 s32 $r0 neg $r0 |
sched 0x7e0 0x7e0 0x7e0 |
$p2 i2i s32 s32 $r1 neg $r1 |
ret |
nop 0 |
// STUB |
gm107_rcp_f64: |
gm107_rsq_f64: |
sched 0x7e0 0x7e0 0x7e0 |
ret |
nop 0 |
nop 0 |
.section #gm107_builtin_offsets |
.b64 #gm107_div_u32 |
.b64 #gm107_div_s32 |
.b64 #gm107_rcp_f64 |
.b64 #gm107_rsq_f64 |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/lib/gm107.asm.h |
---|
0,0 → 1,97 |
uint64_t gm107_builtin_code[] = { |
/* 0x0000: gm107_div_u32 */ |
0x001f8000fc0007e0, |
0x5c30000000170002, |
0x3847040001f70202, |
0x3898078000170003, |
0x001f8000fc0007e0, |
0x5c48000000270302, |
0x5ce0200000170a01, |
0x5c38000000270103, |
0x001f8000fc0007e0, |
0x5a40010000370202, |
0x5c38000000270103, |
0x5a40010000370202, |
0x001f8000fc0007e0, |
0x5c38000000270103, |
0x5a40010000370202, |
0x5c38000000270103, |
0x001f8000fc0007e0, |
0x5a40010000370202, |
0x5c38000000270103, |
0x5a40010000370202, |
0x001f8000fc0007e0, |
0x5c98078000070003, |
0x5c38008000270000, |
0x5ce0200000170a02, |
0x001f8000fc0007e0, |
0x5a00018000070101, |
0x5b6c038000270107, |
0x5c11000000200101, |
0x001f8000fc0007e0, |
0x3810000000100000, |
0x5b6c038000200107, |
0x5c11000000200101, |
0x001f8000fc0007e0, |
0x3810000000100000, |
0xe32000000007000f, |
0x50b0000000070f00, |
/* 0x0120: gm107_div_s32 */ |
0x001f8000fc0007e0, |
0x5b6303800ff70017, |
0x5b6341000ff7011f, |
0x5ce2000000073a00, |
0x001f8000fc0007e0, |
0x5ce2000000173a01, |
0x5c30000000170002, |
0x3847040001f70202, |
0x001f8000fc0007e0, |
0x3898078000170003, |
0x5c48000000270302, |
0x5ce0200000170a01, |
0x001f8000fc0007e0, |
0x5c38000000270103, |
0x5a40010000370202, |
0x5c38000000270103, |
0x001f8000fc0007e0, |
0x5a40010000370202, |
0x5c38000000270103, |
0x5a40010000370202, |
0x001f8000fc0007e0, |
0x5c38000000270103, |
0x5a40010000370202, |
0x5c38000000270103, |
0x001f8000fc0007e0, |
0x5a40010000370202, |
0x5c98078000070003, |
0x5c38008000270000, |
0x001f8000fc0007e0, |
0x5ce0200000170a02, |
0x5a00018000070101, |
0x5b6c038000270107, |
0x001f8000fc0007e0, |
0x5c11000000200101, |
0x3810000000100000, |
0x5b6c038000200107, |
0x001f8000fc0007e0, |
0x5c11000000200101, |
0x3810000000100000, |
0x5ce0200000033a00, |
0x001f8000fc0007e0, |
0x5ce0200000123a01, |
0xe32000000007000f, |
0x50b0000000070f00, |
/* 0x0280: gm107_rcp_f64 */ |
/* 0x0280: gm107_rsq_f64 */ |
0x001f8000fc0007e0, |
0xe32000000007000f, |
0x50b0000000070f00, |
0x50b0000000070f00, |
}; |
uint64_t gm107_builtin_offsets[] = { |
0x0000000000000000, |
0x0000000000000120, |
0x0000000000000280, |
0x0000000000000280, |
}; |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp |
---|
0,0 → 1,1238 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
#include "codegen/nv50_ir_driver.h" |
extern "C" { |
#include "nouveau_debug.h" |
#include "nv50/nv50_program.h" |
} |
namespace nv50_ir { |
Modifier::Modifier(operation op) |
{ |
switch (op) { |
case OP_NEG: bits = NV50_IR_MOD_NEG; break; |
case OP_ABS: bits = NV50_IR_MOD_ABS; break; |
case OP_SAT: bits = NV50_IR_MOD_SAT; break; |
case OP_NOT: bits = NV50_IR_MOD_NOT; break; |
default: |
bits = 0; |
break; |
} |
} |
Modifier Modifier::operator*(const Modifier m) const |
{ |
unsigned int a, b, c; |
b = m.bits; |
if (this->bits & NV50_IR_MOD_ABS) |
b &= ~NV50_IR_MOD_NEG; |
a = (this->bits ^ b) & (NV50_IR_MOD_NOT | NV50_IR_MOD_NEG); |
c = (this->bits | m.bits) & (NV50_IR_MOD_ABS | NV50_IR_MOD_SAT); |
return Modifier(a | c); |
} |
ValueRef::ValueRef(Value *v) : value(NULL), insn(NULL) |
{ |
indirect[0] = -1; |
indirect[1] = -1; |
usedAsPtr = false; |
set(v); |
} |
ValueRef::ValueRef(const ValueRef& ref) : value(NULL), insn(ref.insn) |
{ |
set(ref); |
usedAsPtr = ref.usedAsPtr; |
} |
ValueRef::~ValueRef() |
{ |
this->set(NULL); |
} |
bool ValueRef::getImmediate(ImmediateValue &imm) const |
{ |
const ValueRef *src = this; |
Modifier m; |
DataType type = src->insn->sType; |
while (src) { |
if (src->mod) { |
if (src->insn->sType != type) |
break; |
m *= src->mod; |
} |
if (src->getFile() == FILE_IMMEDIATE) { |
imm = *(src->value->asImm()); |
// The immediate's type isn't required to match its use, it's |
// more of a hint; applying a modifier makes use of that hint. |
imm.reg.type = type; |
m.applyTo(imm); |
return true; |
} |
Instruction *insn = src->value->getUniqueInsn(); |
if (insn && insn->op == OP_MOV) { |
src = &insn->src(0); |
if (src->mod) |
WARN("OP_MOV with modifier encountered !\n"); |
} else { |
src = NULL; |
} |
} |
return false; |
} |
ValueDef::ValueDef(Value *v) : value(NULL), insn(NULL) |
{ |
set(v); |
} |
ValueDef::ValueDef(const ValueDef& def) : value(NULL), insn(NULL) |
{ |
set(def.get()); |
} |
ValueDef::~ValueDef() |
{ |
this->set(NULL); |
} |
void |
ValueRef::set(const ValueRef &ref) |
{ |
this->set(ref.get()); |
mod = ref.mod; |
indirect[0] = ref.indirect[0]; |
indirect[1] = ref.indirect[1]; |
} |
void |
ValueRef::set(Value *refVal) |
{ |
if (value == refVal) |
return; |
if (value) |
value->uses.erase(this); |
if (refVal) |
refVal->uses.insert(this); |
value = refVal; |
} |
void |
ValueDef::set(Value *defVal) |
{ |
if (value == defVal) |
return; |
if (value) |
value->defs.remove(this); |
if (defVal) |
defVal->defs.push_back(this); |
value = defVal; |
} |
// Check if we can replace this definition's value by the value in @rep, |
// including the source modifiers, i.e. make sure that all uses support |
// @rep.mod. |
bool |
ValueDef::mayReplace(const ValueRef &rep) |
{ |
if (!rep.mod) |
return true; |
if (!insn || !insn->bb) // Unbound instruction ? |
return false; |
const Target *target = insn->bb->getProgram()->getTarget(); |
for (Value::UseIterator it = value->uses.begin(); it != value->uses.end(); |
++it) { |
Instruction *insn = (*it)->getInsn(); |
int s = -1; |
for (int i = 0; insn->srcExists(i); ++i) { |
if (insn->src(i).get() == value) { |
// If there are multiple references to us we'd have to check if the |
// combination of mods is still supported, but just bail for now. |
if (&insn->src(i) != (*it)) |
return false; |
s = i; |
} |
} |
assert(s >= 0); // integrity of uses list |
if (!target->isModSupported(insn, s, rep.mod)) |
return false; |
} |
return true; |
} |
void |
ValueDef::replace(const ValueRef &repVal, bool doSet) |
{ |
assert(mayReplace(repVal)); |
if (value == repVal.get()) |
return; |
while (!value->uses.empty()) { |
ValueRef *ref = *value->uses.begin(); |
ref->set(repVal.get()); |
ref->mod *= repVal.mod; |
} |
if (doSet) |
set(repVal.get()); |
} |
Value::Value() |
{ |
join = this; |
memset(®, 0, sizeof(reg)); |
reg.size = 4; |
} |
LValue::LValue(Function *fn, DataFile file) |
{ |
reg.file = file; |
reg.size = (file != FILE_PREDICATE) ? 4 : 1; |
reg.data.id = -1; |
compMask = 0; |
compound = 0; |
ssa = 0; |
fixedReg = 0; |
noSpill = 0; |
fn->add(this, this->id); |
} |
LValue::LValue(Function *fn, LValue *lval) |
{ |
assert(lval); |
reg.file = lval->reg.file; |
reg.size = lval->reg.size; |
reg.data.id = -1; |
compMask = 0; |
compound = 0; |
ssa = 0; |
fixedReg = 0; |
noSpill = 0; |
fn->add(this, this->id); |
} |
LValue * |
LValue::clone(ClonePolicy<Function>& pol) const |
{ |
LValue *that = new_LValue(pol.context(), reg.file); |
pol.set<Value>(this, that); |
that->reg.size = this->reg.size; |
that->reg.type = this->reg.type; |
that->reg.data = this->reg.data; |
return that; |
} |
bool |
LValue::isUniform() const |
{ |
if (defs.size() > 1) |
return false; |
Instruction *insn = getInsn(); |
// let's not try too hard here for now ... |
return !insn->srcExists(1) && insn->getSrc(0)->isUniform(); |
} |
Symbol::Symbol(Program *prog, DataFile f, ubyte fidx) |
{ |
baseSym = NULL; |
reg.file = f; |
reg.fileIndex = fidx; |
reg.data.offset = 0; |
prog->add(this, this->id); |
} |
Symbol * |
Symbol::clone(ClonePolicy<Function>& pol) const |
{ |
Program *prog = pol.context()->getProgram(); |
Symbol *that = new_Symbol(prog, reg.file, reg.fileIndex); |
pol.set<Value>(this, that); |
that->reg.size = this->reg.size; |
that->reg.type = this->reg.type; |
that->reg.data = this->reg.data; |
that->baseSym = this->baseSym; |
return that; |
} |
bool |
Symbol::isUniform() const |
{ |
return |
reg.file != FILE_SYSTEM_VALUE && |
reg.file != FILE_MEMORY_LOCAL && |
reg.file != FILE_SHADER_INPUT; |
} |
ImmediateValue::ImmediateValue(Program *prog, uint32_t uval) |
{ |
memset(®, 0, sizeof(reg)); |
reg.file = FILE_IMMEDIATE; |
reg.size = 4; |
reg.type = TYPE_U32; |
reg.data.u32 = uval; |
prog->add(this, this->id); |
} |
ImmediateValue::ImmediateValue(Program *prog, float fval) |
{ |
memset(®, 0, sizeof(reg)); |
reg.file = FILE_IMMEDIATE; |
reg.size = 4; |
reg.type = TYPE_F32; |
reg.data.f32 = fval; |
prog->add(this, this->id); |
} |
ImmediateValue::ImmediateValue(Program *prog, double dval) |
{ |
memset(®, 0, sizeof(reg)); |
reg.file = FILE_IMMEDIATE; |
reg.size = 8; |
reg.type = TYPE_F64; |
reg.data.f64 = dval; |
prog->add(this, this->id); |
} |
ImmediateValue::ImmediateValue(const ImmediateValue *proto, DataType ty) |
{ |
reg = proto->reg; |
reg.type = ty; |
reg.size = typeSizeof(ty); |
} |
ImmediateValue * |
ImmediateValue::clone(ClonePolicy<Function>& pol) const |
{ |
Program *prog = pol.context()->getProgram(); |
ImmediateValue *that = new_ImmediateValue(prog, 0u); |
pol.set<Value>(this, that); |
that->reg.size = this->reg.size; |
that->reg.type = this->reg.type; |
that->reg.data = this->reg.data; |
return that; |
} |
bool |
ImmediateValue::isInteger(const int i) const |
{ |
switch (reg.type) { |
case TYPE_S8: |
return reg.data.s8 == i; |
case TYPE_U8: |
return reg.data.u8 == i; |
case TYPE_S16: |
return reg.data.s16 == i; |
case TYPE_U16: |
return reg.data.u16 == i; |
case TYPE_S32: |
case TYPE_U32: |
return reg.data.s32 == i; // as if ... |
case TYPE_F32: |
return reg.data.f32 == static_cast<float>(i); |
case TYPE_F64: |
return reg.data.f64 == static_cast<double>(i); |
default: |
return false; |
} |
} |
bool |
ImmediateValue::isNegative() const |
{ |
switch (reg.type) { |
case TYPE_S8: return reg.data.s8 < 0; |
case TYPE_S16: return reg.data.s16 < 0; |
case TYPE_S32: |
case TYPE_U32: return reg.data.s32 < 0; |
case TYPE_F32: return reg.data.u32 & (1 << 31); |
case TYPE_F64: return reg.data.u64 & (1ULL << 63); |
default: |
return false; |
} |
} |
bool |
ImmediateValue::isPow2() const |
{ |
switch (reg.type) { |
case TYPE_U8: |
case TYPE_U16: |
case TYPE_U32: return util_is_power_of_two(reg.data.u32); |
default: |
return false; |
} |
} |
void |
ImmediateValue::applyLog2() |
{ |
switch (reg.type) { |
case TYPE_S8: |
case TYPE_S16: |
case TYPE_S32: |
assert(!this->isNegative()); |
// fall through |
case TYPE_U8: |
case TYPE_U16: |
case TYPE_U32: |
reg.data.u32 = util_logbase2(reg.data.u32); |
break; |
case TYPE_F32: |
reg.data.f32 = log2f(reg.data.f32); |
break; |
case TYPE_F64: |
reg.data.f64 = log2(reg.data.f64); |
break; |
default: |
assert(0); |
break; |
} |
} |
bool |
ImmediateValue::compare(CondCode cc, float fval) const |
{ |
if (reg.type != TYPE_F32) |
ERROR("immediate value is not of type f32"); |
switch (static_cast<CondCode>(cc & 7)) { |
case CC_TR: return true; |
case CC_FL: return false; |
case CC_LT: return reg.data.f32 < fval; |
case CC_LE: return reg.data.f32 <= fval; |
case CC_GT: return reg.data.f32 > fval; |
case CC_GE: return reg.data.f32 >= fval; |
case CC_EQ: return reg.data.f32 == fval; |
case CC_NE: return reg.data.f32 != fval; |
default: |
assert(0); |
return false; |
} |
} |
ImmediateValue& |
ImmediateValue::operator=(const ImmediateValue &that) |
{ |
this->reg = that.reg; |
return (*this); |
} |
bool |
Value::interfers(const Value *that) const |
{ |
uint32_t idA, idB; |
if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) |
return false; |
if (this->asImm()) |
return false; |
if (this->asSym()) { |
idA = this->join->reg.data.offset; |
idB = that->join->reg.data.offset; |
} else { |
idA = this->join->reg.data.id * MIN2(this->reg.size, 4); |
idB = that->join->reg.data.id * MIN2(that->reg.size, 4); |
} |
if (idA < idB) |
return (idA + this->reg.size > idB); |
else |
if (idA > idB) |
return (idB + that->reg.size > idA); |
else |
return (idA == idB); |
} |
bool |
Value::equals(const Value *that, bool strict) const |
{ |
if (strict) |
return this == that; |
if (that->reg.file != reg.file || that->reg.fileIndex != reg.fileIndex) |
return false; |
if (that->reg.size != this->reg.size) |
return false; |
if (that->reg.data.id != this->reg.data.id) |
return false; |
return true; |
} |
bool |
ImmediateValue::equals(const Value *that, bool strict) const |
{ |
const ImmediateValue *imm = that->asImm(); |
if (!imm) |
return false; |
return reg.data.u64 == imm->reg.data.u64; |
} |
bool |
Symbol::equals(const Value *that, bool strict) const |
{ |
if (reg.file != that->reg.file || reg.fileIndex != that->reg.fileIndex) |
return false; |
assert(that->asSym()); |
if (this->baseSym != that->asSym()->baseSym) |
return false; |
if (reg.file == FILE_SYSTEM_VALUE) |
return (this->reg.data.sv.sv == that->reg.data.sv.sv && |
this->reg.data.sv.index == that->reg.data.sv.index); |
return this->reg.data.offset == that->reg.data.offset; |
} |
void Instruction::init() |
{ |
next = prev = 0; |
cc = CC_ALWAYS; |
rnd = ROUND_N; |
cache = CACHE_CA; |
subOp = 0; |
saturate = 0; |
join = 0; |
exit = 0; |
terminator = 0; |
ftz = 0; |
dnz = 0; |
perPatch = 0; |
fixed = 0; |
encSize = 0; |
ipa = 0; |
mask = 0; |
lanes = 0xf; |
postFactor = 0; |
predSrc = -1; |
flagsDef = -1; |
flagsSrc = -1; |
} |
Instruction::Instruction() |
{ |
init(); |
op = OP_NOP; |
dType = sType = TYPE_F32; |
id = -1; |
bb = 0; |
} |
Instruction::Instruction(Function *fn, operation opr, DataType ty) |
{ |
init(); |
op = opr; |
dType = sType = ty; |
fn->add(this, id); |
} |
Instruction::~Instruction() |
{ |
if (bb) { |
Function *fn = bb->getFunction(); |
bb->remove(this); |
fn->allInsns.remove(id); |
} |
for (int s = 0; srcExists(s); ++s) |
setSrc(s, NULL); |
// must unlink defs too since the list pointers will get deallocated |
for (int d = 0; defExists(d); ++d) |
setDef(d, NULL); |
} |
void |
Instruction::setDef(int i, Value *val) |
{ |
int size = defs.size(); |
if (i >= size) { |
defs.resize(i + 1); |
while (size <= i) |
defs[size++].setInsn(this); |
} |
defs[i].set(val); |
} |
void |
Instruction::setSrc(int s, Value *val) |
{ |
int size = srcs.size(); |
if (s >= size) { |
srcs.resize(s + 1); |
while (size <= s) |
srcs[size++].setInsn(this); |
} |
srcs[s].set(val); |
} |
void |
Instruction::setSrc(int s, const ValueRef& ref) |
{ |
setSrc(s, ref.get()); |
srcs[s].mod = ref.mod; |
} |
void |
Instruction::swapSources(int a, int b) |
{ |
Value *value = srcs[a].get(); |
Modifier m = srcs[a].mod; |
setSrc(a, srcs[b]); |
srcs[b].set(value); |
srcs[b].mod = m; |
} |
static inline void moveSourcesAdjustIndex(int8_t &index, int s, int delta) |
{ |
if (index >= s) |
index += delta; |
else |
if ((delta < 0) && (index >= (s + delta))) |
index = -1; |
} |
// Moves sources [@s,last_source] by @delta. |
// If @delta < 0, sources [@s - abs(@delta), @s) are erased. |
void |
Instruction::moveSources(const int s, const int delta) |
{ |
if (delta == 0) |
return; |
assert(s + delta >= 0); |
int k; |
for (k = 0; srcExists(k); ++k) { |
for (int i = 0; i < 2; ++i) |
moveSourcesAdjustIndex(src(k).indirect[i], s, delta); |
} |
moveSourcesAdjustIndex(predSrc, s, delta); |
moveSourcesAdjustIndex(flagsSrc, s, delta); |
if (asTex()) { |
TexInstruction *tex = asTex(); |
moveSourcesAdjustIndex(tex->tex.rIndirectSrc, s, delta); |
moveSourcesAdjustIndex(tex->tex.sIndirectSrc, s, delta); |
} |
if (delta > 0) { |
--k; |
for (int p = k + delta; k >= s; --k, --p) |
setSrc(p, src(k)); |
} else { |
int p; |
for (p = s; p < k; ++p) |
setSrc(p + delta, src(p)); |
for (; (p + delta) < k; ++p) |
setSrc(p + delta, NULL); |
} |
} |
void |
Instruction::takeExtraSources(int s, Value *values[3]) |
{ |
values[0] = getIndirect(s, 0); |
if (values[0]) |
setIndirect(s, 0, NULL); |
values[1] = getIndirect(s, 1); |
if (values[1]) |
setIndirect(s, 1, NULL); |
values[2] = getPredicate(); |
if (values[2]) |
setPredicate(cc, NULL); |
} |
void |
Instruction::putExtraSources(int s, Value *values[3]) |
{ |
if (values[0]) |
setIndirect(s, 0, values[0]); |
if (values[1]) |
setIndirect(s, 1, values[1]); |
if (values[2]) |
setPredicate(cc, values[2]); |
} |
Instruction * |
Instruction::clone(ClonePolicy<Function>& pol, Instruction *i) const |
{ |
if (!i) |
i = new_Instruction(pol.context(), op, dType); |
#ifndef NDEBUG // non-conformant assert, so this is required |
assert(typeid(*i) == typeid(*this)); |
#endif |
pol.set<Instruction>(this, i); |
i->sType = sType; |
i->rnd = rnd; |
i->cache = cache; |
i->subOp = subOp; |
i->saturate = saturate; |
i->join = join; |
i->exit = exit; |
i->mask = mask; |
i->ftz = ftz; |
i->dnz = dnz; |
i->ipa = ipa; |
i->lanes = lanes; |
i->perPatch = perPatch; |
i->postFactor = postFactor; |
for (int d = 0; defExists(d); ++d) |
i->setDef(d, pol.get(getDef(d))); |
for (int s = 0; srcExists(s); ++s) { |
i->setSrc(s, pol.get(getSrc(s))); |
i->src(s).mod = src(s).mod; |
} |
i->cc = cc; |
i->predSrc = predSrc; |
i->flagsDef = flagsDef; |
i->flagsSrc = flagsSrc; |
return i; |
} |
unsigned int |
Instruction::defCount(unsigned int mask, bool singleFile) const |
{ |
unsigned int i, n; |
if (singleFile) { |
unsigned int d = ffs(mask); |
if (!d) |
return 0; |
for (i = d--; defExists(i); ++i) |
if (getDef(i)->reg.file != getDef(d)->reg.file) |
mask &= ~(1 << i); |
} |
for (n = 0, i = 0; this->defExists(i); ++i, mask >>= 1) |
n += mask & 1; |
return n; |
} |
unsigned int |
Instruction::srcCount(unsigned int mask, bool singleFile) const |
{ |
unsigned int i, n; |
if (singleFile) { |
unsigned int s = ffs(mask); |
if (!s) |
return 0; |
for (i = s--; srcExists(i); ++i) |
if (getSrc(i)->reg.file != getSrc(s)->reg.file) |
mask &= ~(1 << i); |
} |
for (n = 0, i = 0; this->srcExists(i); ++i, mask >>= 1) |
n += mask & 1; |
return n; |
} |
bool |
Instruction::setIndirect(int s, int dim, Value *value) |
{ |
assert(this->srcExists(s)); |
int p = srcs[s].indirect[dim]; |
if (p < 0) { |
if (!value) |
return true; |
p = srcs.size(); |
while (p > 0 && !srcExists(p - 1)) |
--p; |
} |
setSrc(p, value); |
srcs[p].usedAsPtr = (value != 0); |
srcs[s].indirect[dim] = value ? p : -1; |
return true; |
} |
bool |
Instruction::setPredicate(CondCode ccode, Value *value) |
{ |
cc = ccode; |
if (!value) { |
if (predSrc >= 0) { |
srcs[predSrc].set(NULL); |
predSrc = -1; |
} |
return true; |
} |
if (predSrc < 0) { |
predSrc = srcs.size(); |
while (predSrc > 0 && !srcExists(predSrc - 1)) |
--predSrc; |
} |
setSrc(predSrc, value); |
return true; |
} |
bool |
Instruction::writesPredicate() const |
{ |
for (int d = 0; defExists(d); ++d) |
if (getDef(d)->inFile(FILE_PREDICATE) || getDef(d)->inFile(FILE_FLAGS)) |
return true; |
return false; |
} |
static bool |
insnCheckCommutationDefSrc(const Instruction *a, const Instruction *b) |
{ |
for (int d = 0; a->defExists(d); ++d) |
for (int s = 0; b->srcExists(s); ++s) |
if (a->getDef(d)->interfers(b->getSrc(s))) |
return false; |
return true; |
} |
static bool |
insnCheckCommutationDefDef(const Instruction *a, const Instruction *b) |
{ |
for (int d = 0; a->defExists(d); ++d) |
for (int c = 0; b->defExists(c); ++c) |
if (a->getDef(d)->interfers(b->getDef(c))) |
return false; |
return true; |
} |
bool |
Instruction::isCommutationLegal(const Instruction *i) const |
{ |
bool ret = insnCheckCommutationDefDef(this, i); |
ret = ret && insnCheckCommutationDefSrc(this, i); |
ret = ret && insnCheckCommutationDefSrc(i, this); |
return ret; |
} |
TexInstruction::TexInstruction(Function *fn, operation op) |
: Instruction(fn, op, TYPE_F32) |
{ |
memset(&tex, 0, sizeof(tex)); |
tex.rIndirectSrc = -1; |
tex.sIndirectSrc = -1; |
} |
TexInstruction::~TexInstruction() |
{ |
for (int c = 0; c < 3; ++c) { |
dPdx[c].set(NULL); |
dPdy[c].set(NULL); |
} |
for (int n = 0; n < 4; ++n) |
for (int c = 0; c < 3; ++c) |
offset[n][c].set(NULL); |
} |
TexInstruction * |
TexInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const |
{ |
TexInstruction *tex = (i ? static_cast<TexInstruction *>(i) : |
new_TexInstruction(pol.context(), op)); |
Instruction::clone(pol, tex); |
tex->tex = this->tex; |
if (op == OP_TXD) { |
for (unsigned int c = 0; c < tex->tex.target.getDim(); ++c) { |
tex->dPdx[c].set(dPdx[c]); |
tex->dPdy[c].set(dPdy[c]); |
} |
} |
for (int n = 0; n < tex->tex.useOffsets; ++n) |
for (int c = 0; c < 3; ++c) |
tex->offset[n][c].set(offset[n][c]); |
return tex; |
} |
const struct TexInstruction::Target::Desc TexInstruction::Target::descTable[] = |
{ |
{ "1D", 1, 1, false, false, false }, |
{ "2D", 2, 2, false, false, false }, |
{ "2D_MS", 2, 3, false, false, false }, |
{ "3D", 3, 3, false, false, false }, |
{ "CUBE", 2, 3, false, true, false }, |
{ "1D_SHADOW", 1, 1, false, false, true }, |
{ "2D_SHADOW", 2, 2, false, false, true }, |
{ "CUBE_SHADOW", 2, 3, false, true, true }, |
{ "1D_ARRAY", 1, 2, true, false, false }, |
{ "2D_ARRAY", 2, 3, true, false, false }, |
{ "2D_MS_ARRAY", 2, 4, true, false, false }, |
{ "CUBE_ARRAY", 2, 4, true, true, false }, |
{ "1D_ARRAY_SHADOW", 1, 2, true, false, true }, |
{ "2D_ARRAY_SHADOW", 2, 3, true, false, true }, |
{ "RECT", 2, 2, false, false, false }, |
{ "RECT_SHADOW", 2, 2, false, false, true }, |
{ "CUBE_ARRAY_SHADOW", 2, 4, true, true, true }, |
{ "BUFFER", 1, 1, false, false, false }, |
}; |
void |
TexInstruction::setIndirectR(Value *v) |
{ |
int p = ((tex.rIndirectSrc < 0) && v) ? srcs.size() : tex.rIndirectSrc; |
if (p >= 0) { |
tex.rIndirectSrc = p; |
setSrc(p, v); |
srcs[p].usedAsPtr = !!v; |
} |
} |
void |
TexInstruction::setIndirectS(Value *v) |
{ |
int p = ((tex.sIndirectSrc < 0) && v) ? srcs.size() : tex.sIndirectSrc; |
if (p >= 0) { |
tex.sIndirectSrc = p; |
setSrc(p, v); |
srcs[p].usedAsPtr = !!v; |
} |
} |
CmpInstruction::CmpInstruction(Function *fn, operation op) |
: Instruction(fn, op, TYPE_F32) |
{ |
setCond = CC_ALWAYS; |
} |
CmpInstruction * |
CmpInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const |
{ |
CmpInstruction *cmp = (i ? static_cast<CmpInstruction *>(i) : |
new_CmpInstruction(pol.context(), op)); |
cmp->dType = dType; |
Instruction::clone(pol, cmp); |
cmp->setCond = setCond; |
return cmp; |
} |
FlowInstruction::FlowInstruction(Function *fn, operation op, void *targ) |
: Instruction(fn, op, TYPE_NONE) |
{ |
if (op == OP_CALL) |
target.fn = reinterpret_cast<Function *>(targ); |
else |
target.bb = reinterpret_cast<BasicBlock *>(targ); |
if (op == OP_BRA || |
op == OP_CONT || op == OP_BREAK || |
op == OP_RET || op == OP_EXIT) |
terminator = 1; |
else |
if (op == OP_JOIN) |
terminator = targ ? 1 : 0; |
allWarp = absolute = limit = builtin = indirect = 0; |
} |
FlowInstruction * |
FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const |
{ |
FlowInstruction *flow = (i ? static_cast<FlowInstruction *>(i) : |
new_FlowInstruction(pol.context(), op, NULL)); |
Instruction::clone(pol, flow); |
flow->allWarp = allWarp; |
flow->absolute = absolute; |
flow->limit = limit; |
flow->builtin = builtin; |
if (builtin) |
flow->target.builtin = target.builtin; |
else |
if (op == OP_CALL) |
flow->target.fn = target.fn; |
else |
if (target.bb) |
flow->target.bb = pol.get<BasicBlock>(target.bb); |
return flow; |
} |
Program::Program(Type type, Target *arch) |
: progType(type), |
target(arch), |
mem_Instruction(sizeof(Instruction), 6), |
mem_CmpInstruction(sizeof(CmpInstruction), 4), |
mem_TexInstruction(sizeof(TexInstruction), 4), |
mem_FlowInstruction(sizeof(FlowInstruction), 4), |
mem_LValue(sizeof(LValue), 8), |
mem_Symbol(sizeof(Symbol), 7), |
mem_ImmediateValue(sizeof(ImmediateValue), 7) |
{ |
code = NULL; |
binSize = 0; |
maxGPR = -1; |
main = new Function(this, "MAIN", ~0); |
calls.insert(&main->call); |
dbgFlags = 0; |
optLevel = 0; |
targetPriv = NULL; |
} |
Program::~Program() |
{ |
for (ArrayList::Iterator it = allFuncs.iterator(); !it.end(); it.next()) |
delete reinterpret_cast<Function *>(it.get()); |
for (ArrayList::Iterator it = allRValues.iterator(); !it.end(); it.next()) |
releaseValue(reinterpret_cast<Value *>(it.get())); |
} |
void Program::releaseInstruction(Instruction *insn) |
{ |
// TODO: make this not suck so much |
insn->~Instruction(); |
if (insn->asCmp()) |
mem_CmpInstruction.release(insn); |
else |
if (insn->asTex()) |
mem_TexInstruction.release(insn); |
else |
if (insn->asFlow()) |
mem_FlowInstruction.release(insn); |
else |
mem_Instruction.release(insn); |
} |
void Program::releaseValue(Value *value) |
{ |
value->~Value(); |
if (value->asLValue()) |
mem_LValue.release(value); |
else |
if (value->asImm()) |
mem_ImmediateValue.release(value); |
else |
if (value->asSym()) |
mem_Symbol.release(value); |
} |
} // namespace nv50_ir |
extern "C" { |
static void |
nv50_ir_init_prog_info(struct nv50_ir_prog_info *info) |
{ |
#if defined(PIPE_SHADER_HULL) && defined(PIPE_SHADER_DOMAIN) |
if (info->type == PIPE_SHADER_HULL || info->type == PIPE_SHADER_DOMAIN) { |
info->prop.tp.domain = PIPE_PRIM_MAX; |
info->prop.tp.outputPrim = PIPE_PRIM_MAX; |
} |
#endif |
if (info->type == PIPE_SHADER_GEOMETRY) { |
info->prop.gp.instanceCount = 1; |
info->prop.gp.maxVertices = 1; |
} |
info->io.clipDistance = 0xff; |
info->io.pointSize = 0xff; |
info->io.instanceId = 0xff; |
info->io.vertexId = 0xff; |
info->io.edgeFlagIn = 0xff; |
info->io.edgeFlagOut = 0xff; |
info->io.fragDepth = 0xff; |
info->io.sampleMask = 0xff; |
info->io.backFaceColor[0] = info->io.backFaceColor[1] = 0xff; |
} |
int |
nv50_ir_generate_code(struct nv50_ir_prog_info *info) |
{ |
int ret = 0; |
nv50_ir::Program::Type type; |
nv50_ir_init_prog_info(info); |
#define PROG_TYPE_CASE(a, b) \ |
case PIPE_SHADER_##a: type = nv50_ir::Program::TYPE_##b; break |
switch (info->type) { |
PROG_TYPE_CASE(VERTEX, VERTEX); |
// PROG_TYPE_CASE(HULL, TESSELLATION_CONTROL); |
// PROG_TYPE_CASE(DOMAIN, TESSELLATION_EVAL); |
PROG_TYPE_CASE(GEOMETRY, GEOMETRY); |
PROG_TYPE_CASE(FRAGMENT, FRAGMENT); |
PROG_TYPE_CASE(COMPUTE, COMPUTE); |
default: |
type = nv50_ir::Program::TYPE_COMPUTE; |
break; |
} |
INFO_DBG(info->dbgFlags, VERBOSE, "translating program of type %u\n", type); |
nv50_ir::Target *targ = nv50_ir::Target::create(info->target); |
if (!targ) |
return -1; |
nv50_ir::Program *prog = new nv50_ir::Program(type, targ); |
if (!prog) |
return -1; |
prog->driver = info; |
prog->dbgFlags = info->dbgFlags; |
prog->optLevel = info->optLevel; |
switch (info->bin.sourceRep) { |
#if 0 |
case PIPE_IR_LLVM: |
case PIPE_IR_GLSL: |
return -1; |
case PIPE_IR_SM4: |
ret = prog->makeFromSM4(info) ? 0 : -2; |
break; |
case PIPE_IR_TGSI: |
#endif |
default: |
ret = prog->makeFromTGSI(info) ? 0 : -2; |
break; |
} |
if (ret < 0) |
goto out; |
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) |
prog->print(); |
targ->parseDriverInfo(info); |
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA); |
prog->convertToSSA(); |
if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE) |
prog->print(); |
prog->optimizeSSA(info->optLevel); |
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA); |
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) |
prog->print(); |
if (!prog->registerAllocation()) { |
ret = -4; |
goto out; |
} |
prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA); |
prog->optimizePostRA(info->optLevel); |
if (!prog->emitBinary(info)) { |
ret = -5; |
goto out; |
} |
out: |
INFO_DBG(prog->dbgFlags, VERBOSE, "nv50_ir_generate_code: ret = %i\n", ret); |
info->bin.maxGPR = prog->maxGPR; |
info->bin.code = prog->code; |
info->bin.codeSize = prog->binSize; |
info->bin.tlsSpace = prog->tlsSize; |
delete prog; |
nv50_ir::Target::destroy(targ); |
return ret; |
} |
} // extern "C" |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir.h |
---|
0,0 → 1,1227 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_H__ |
#define __NV50_IR_H__ |
#include <stdio.h> |
#include <stdlib.h> |
#include <stdint.h> |
#include <deque> |
#include <list> |
#include <vector> |
#include <tr1/unordered_set> |
#include "codegen/nv50_ir_util.h" |
#include "codegen/nv50_ir_graph.h" |
#include "codegen/nv50_ir_driver.h" |
namespace nv50_ir { |
enum operation |
{ |
OP_NOP = 0, |
OP_PHI, |
OP_UNION, // unify a new definition and several source values |
OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced) |
OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value |
OP_CONSTRAINT, // copy values into consecutive registers |
OP_MOV, // simple copy, no modifiers allowed |
OP_LOAD, |
OP_STORE, |
OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds |
OP_SUB, |
OP_MUL, |
OP_DIV, |
OP_MOD, |
OP_MAD, |
OP_FMA, |
OP_SAD, // abs(src0 - src1) + src2 |
OP_ABS, |
OP_NEG, |
OP_NOT, |
OP_AND, |
OP_OR, |
OP_XOR, |
OP_SHL, |
OP_SHR, |
OP_MAX, |
OP_MIN, |
OP_SAT, // CLAMP(f32, 0.0, 1.0) |
OP_CEIL, |
OP_FLOOR, |
OP_TRUNC, |
OP_CVT, |
OP_SET_AND, // dst = (src0 CMP src1) & src2 |
OP_SET_OR, |
OP_SET_XOR, |
OP_SET, |
OP_SELP, // dst = src2 ? src0 : src1 |
OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1 |
OP_RCP, |
OP_RSQ, |
OP_LG2, |
OP_SIN, |
OP_COS, |
OP_EX2, |
OP_EXP, // exponential (base M_E) |
OP_LOG, // natural logarithm |
OP_PRESIN, |
OP_PREEX2, |
OP_SQRT, |
OP_POW, |
OP_BRA, |
OP_CALL, |
OP_RET, |
OP_CONT, |
OP_BREAK, |
OP_PRERET, |
OP_PRECONT, |
OP_PREBREAK, |
OP_BRKPT, // breakpoint (not related to loops) |
OP_JOINAT, // push control flow convergence point |
OP_JOIN, // converge |
OP_DISCARD, |
OP_EXIT, |
OP_MEMBAR, // memory barrier (mfence, lfence, sfence) |
OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base |
OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] |
OP_EXPORT, |
OP_LINTERP, |
OP_PINTERP, |
OP_EMIT, // emit vertex |
OP_RESTART, // restart primitive |
OP_TEX, |
OP_TXB, // texture bias |
OP_TXL, // texure lod |
OP_TXF, // texel fetch |
OP_TXQ, // texture size query |
OP_TXD, // texture derivatives |
OP_TXG, // texture gather |
OP_TXLQ, // texture query lod |
OP_TEXCSAA, // texture op for coverage sampling |
OP_TEXPREP, // turn cube map array into 2d array coordinates |
OP_SULDB, // surface load (raw) |
OP_SULDP, // surface load (formatted) |
OP_SUSTB, // surface store (raw) |
OP_SUSTP, // surface store (formatted) |
OP_SUREDB, |
OP_SUREDP, // surface reduction (atomic op) |
OP_SULEA, // surface load effective address |
OP_SUBFM, // surface bitfield manipulation |
OP_SUCLAMP, // clamp surface coordinates |
OP_SUEAU, // surface effective address |
OP_MADSP, // special integer multiply-add |
OP_TEXBAR, // texture dependency barrier |
OP_DFDX, |
OP_DFDY, |
OP_RDSV, // read system value |
OP_WRSV, // write system value |
OP_PIXLD, // get info about raster object or surfaces |
OP_QUADOP, |
OP_QUADON, |
OP_QUADPOP, |
OP_POPCNT, // bitcount(src0 & src1) |
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] |
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK |
OP_BFIND, // find highest/lowest set bit |
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) |
OP_ATOM, |
OP_BAR, // execution barrier, sources = { id, thread count, predicate } |
OP_VADD, // byte/word vector operations |
OP_VAVG, |
OP_VMIN, |
OP_VMAX, |
OP_VSAD, |
OP_VSET, |
OP_VSHR, |
OP_VSHL, |
OP_VSEL, |
OP_CCTL, // cache control |
OP_SHFL, // warp shuffle |
OP_LAST |
}; |
// various instruction-specific modifier definitions Instruction::subOp |
// MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs) |
#define NV50_IR_SUBOP_MUL_HIGH 1 |
#define NV50_IR_SUBOP_EMIT_RESTART 1 |
#define NV50_IR_SUBOP_LDC_IL 1 |
#define NV50_IR_SUBOP_LDC_IS 2 |
#define NV50_IR_SUBOP_LDC_ISL 3 |
#define NV50_IR_SUBOP_SHIFT_WRAP 1 |
#define NV50_IR_SUBOP_EMU_PRERET 1 |
#define NV50_IR_SUBOP_TEXBAR(n) n |
#define NV50_IR_SUBOP_MOV_FINAL 1 |
#define NV50_IR_SUBOP_EXTBF_REV 1 |
#define NV50_IR_SUBOP_BFIND_SAMT 1 |
#define NV50_IR_SUBOP_RCPRSQ_64H 1 |
#define NV50_IR_SUBOP_PERMT_F4E 1 |
#define NV50_IR_SUBOP_PERMT_B4E 2 |
#define NV50_IR_SUBOP_PERMT_RC8 3 |
#define NV50_IR_SUBOP_PERMT_ECL 4 |
#define NV50_IR_SUBOP_PERMT_ECR 5 |
#define NV50_IR_SUBOP_PERMT_RC16 6 |
#define NV50_IR_SUBOP_BAR_SYNC 0 |
#define NV50_IR_SUBOP_BAR_ARRIVE 1 |
#define NV50_IR_SUBOP_BAR_RED_AND 2 |
#define NV50_IR_SUBOP_BAR_RED_OR 3 |
#define NV50_IR_SUBOP_BAR_RED_POPC 4 |
#define NV50_IR_SUBOP_MEMBAR_L 1 |
#define NV50_IR_SUBOP_MEMBAR_S 2 |
#define NV50_IR_SUBOP_MEMBAR_M 3 |
#define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2) |
#define NV50_IR_SUBOP_MEMBAR_GL (1 << 2) |
#define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2) |
#define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3) |
#define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3) |
#define NV50_IR_SUBOP_MEMBAR(d,s) \ |
(NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s) |
#define NV50_IR_SUBOP_ATOM_ADD 0 |
#define NV50_IR_SUBOP_ATOM_MIN 1 |
#define NV50_IR_SUBOP_ATOM_MAX 2 |
#define NV50_IR_SUBOP_ATOM_INC 3 |
#define NV50_IR_SUBOP_ATOM_DEC 4 |
#define NV50_IR_SUBOP_ATOM_AND 5 |
#define NV50_IR_SUBOP_ATOM_OR 6 |
#define NV50_IR_SUBOP_ATOM_XOR 7 |
#define NV50_IR_SUBOP_ATOM_CAS 8 |
#define NV50_IR_SUBOP_ATOM_EXCH 9 |
#define NV50_IR_SUBOP_CCTL_IV 5 |
#define NV50_IR_SUBOP_CCTL_IVALL 6 |
#define NV50_IR_SUBOP_SUST_IGN 0 |
#define NV50_IR_SUBOP_SUST_TRAP 1 |
#define NV50_IR_SUBOP_SUST_SDCL 3 |
#define NV50_IR_SUBOP_SULD_ZERO 0 |
#define NV50_IR_SUBOP_SULD_TRAP 1 |
#define NV50_IR_SUBOP_SULD_SDCL 3 |
#define NV50_IR_SUBOP_SUBFM_3D 1 |
#define NV50_IR_SUBOP_SUCLAMP_2D 0x10 |
#define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) |
#define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) |
#define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) |
#define NV50_IR_SUBOP_PIXLD_COUNT 0 |
#define NV50_IR_SUBOP_PIXLD_COVMASK 1 |
#define NV50_IR_SUBOP_PIXLD_COVERED 2 |
#define NV50_IR_SUBOP_PIXLD_OFFSET 3 |
#define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4 |
#define NV50_IR_SUBOP_PIXLD_SAMPLEID 5 |
#define NV50_IR_SUBOP_SHFL_IDX 0 |
#define NV50_IR_SUBOP_SHFL_UP 1 |
#define NV50_IR_SUBOP_SHFL_DOWN 2 |
#define NV50_IR_SUBOP_SHFL_BFLY 3 |
#define NV50_IR_SUBOP_MADSP_SD 0xffff |
// Yes, we could represent those with DataType. |
// Or put the type into operation and have a couple 1000 values in that enum. |
// This will have to do for now. |
// The bitfields are supposed to correspond to nve4 ISA. |
#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a)) |
#define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000) |
#define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) |
#define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) |
#define NV50_IR_SUBOP_Vn(n) ((n) >> 14) |
enum DataType |
{ |
TYPE_NONE, |
TYPE_U8, |
TYPE_S8, |
TYPE_U16, |
TYPE_S16, |
TYPE_U32, |
TYPE_S32, |
TYPE_U64, // 64 bit operations are only lowered after register allocation |
TYPE_S64, |
TYPE_F16, |
TYPE_F32, |
TYPE_F64, |
TYPE_B96, |
TYPE_B128 |
}; |
enum CondCode |
{ |
CC_FL = 0, |
CC_NEVER = CC_FL, // when used with FILE_FLAGS |
CC_LT = 1, |
CC_EQ = 2, |
CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE |
CC_LE = 3, |
CC_GT = 4, |
CC_NE = 5, |
CC_P = CC_NE, |
CC_GE = 6, |
CC_TR = 7, |
CC_ALWAYS = CC_TR, |
CC_U = 8, |
CC_LTU = 9, |
CC_EQU = 10, |
CC_LEU = 11, |
CC_GTU = 12, |
CC_NEU = 13, |
CC_GEU = 14, |
CC_NO = 0x10, |
CC_NC = 0x11, |
CC_NS = 0x12, |
CC_NA = 0x13, |
CC_A = 0x14, |
CC_S = 0x15, |
CC_C = 0x16, |
CC_O = 0x17 |
}; |
enum RoundMode |
{ |
ROUND_N, // nearest |
ROUND_M, // towards -inf |
ROUND_Z, // towards 0 |
ROUND_P, // towards +inf |
ROUND_NI, // nearest integer |
ROUND_MI, // to integer towards -inf |
ROUND_ZI, // to integer towards 0 |
ROUND_PI, // to integer towards +inf |
}; |
enum CacheMode |
{ |
CACHE_CA, // cache at all levels |
CACHE_WB = CACHE_CA, // cache write back |
CACHE_CG, // cache at global level |
CACHE_CS, // cache streaming |
CACHE_CV, // cache as volatile |
CACHE_WT = CACHE_CV // cache write-through |
}; |
enum DataFile |
{ |
FILE_NULL = 0, |
FILE_GPR, |
FILE_PREDICATE, // boolean predicate |
FILE_FLAGS, // zero/sign/carry/overflow bits |
FILE_ADDRESS, |
LAST_REGISTER_FILE = FILE_ADDRESS, |
FILE_IMMEDIATE, |
FILE_MEMORY_CONST, |
FILE_SHADER_INPUT, |
FILE_SHADER_OUTPUT, |
FILE_MEMORY_GLOBAL, |
FILE_MEMORY_SHARED, |
FILE_MEMORY_LOCAL, |
FILE_SYSTEM_VALUE, |
DATA_FILE_COUNT |
}; |
enum TexTarget |
{ |
TEX_TARGET_1D, |
TEX_TARGET_2D, |
TEX_TARGET_2D_MS, |
TEX_TARGET_3D, |
TEX_TARGET_CUBE, |
TEX_TARGET_1D_SHADOW, |
TEX_TARGET_2D_SHADOW, |
TEX_TARGET_CUBE_SHADOW, |
TEX_TARGET_1D_ARRAY, |
TEX_TARGET_2D_ARRAY, |
TEX_TARGET_2D_MS_ARRAY, |
TEX_TARGET_CUBE_ARRAY, |
TEX_TARGET_1D_ARRAY_SHADOW, |
TEX_TARGET_2D_ARRAY_SHADOW, |
TEX_TARGET_RECT, |
TEX_TARGET_RECT_SHADOW, |
TEX_TARGET_CUBE_ARRAY_SHADOW, |
TEX_TARGET_BUFFER, |
TEX_TARGET_COUNT |
}; |
enum SVSemantic |
{ |
SV_POSITION, // WPOS |
SV_VERTEX_ID, |
SV_INSTANCE_ID, |
SV_INVOCATION_ID, |
SV_PRIMITIVE_ID, |
SV_VERTEX_COUNT, // gl_PatchVerticesIn |
SV_LAYER, |
SV_VIEWPORT_INDEX, |
SV_YDIR, |
SV_FACE, |
SV_POINT_SIZE, |
SV_POINT_COORD, |
SV_CLIP_DISTANCE, |
SV_SAMPLE_INDEX, |
SV_SAMPLE_POS, |
SV_SAMPLE_MASK, |
SV_TESS_FACTOR, |
SV_TESS_COORD, |
SV_TID, |
SV_CTAID, |
SV_NTID, |
SV_GRIDID, |
SV_NCTAID, |
SV_LANEID, |
SV_PHYSID, |
SV_NPHYSID, |
SV_CLOCK, |
SV_LBASE, |
SV_SBASE, |
SV_VERTEX_STRIDE, |
SV_INVOCATION_INFO, |
SV_UNDEFINED, |
SV_LAST |
}; |
class Program; |
class Function; |
class BasicBlock; |
class Target; |
class Instruction; |
class CmpInstruction; |
class TexInstruction; |
class FlowInstruction; |
class Value; |
class LValue; |
class Symbol; |
class ImmediateValue; |
struct Storage |
{ |
DataFile file; |
int8_t fileIndex; // signed, may be indirect for CONST[] |
uint8_t size; // this should match the Instruction type's size |
DataType type; // mainly for pretty printing |
union { |
uint64_t u64; // immediate values |
uint32_t u32; |
uint16_t u16; |
uint8_t u8; |
int64_t s64; |
int32_t s32; |
int16_t s16; |
int8_t s8; |
float f32; |
double f64; |
int32_t offset; // offset from 0 (base of address space) |
int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4) |
struct { |
SVSemantic sv; |
int index; |
} sv; |
} data; |
}; |
// precedence: NOT after SAT after NEG after ABS |
#define NV50_IR_MOD_ABS (1 << 0) |
#define NV50_IR_MOD_NEG (1 << 1) |
#define NV50_IR_MOD_SAT (1 << 2) |
#define NV50_IR_MOD_NOT (1 << 3) |
#define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS) |
#define NV50_IR_INTERP_MODE_MASK 0x3 |
#define NV50_IR_INTERP_LINEAR (0 << 0) |
#define NV50_IR_INTERP_PERSPECTIVE (1 << 0) |
#define NV50_IR_INTERP_FLAT (2 << 0) |
#define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ? |
#define NV50_IR_INTERP_SAMPLE_MASK 0xc |
#define NV50_IR_INTERP_DEFAULT (0 << 2) |
#define NV50_IR_INTERP_CENTROID (1 << 2) |
#define NV50_IR_INTERP_OFFSET (2 << 2) |
#define NV50_IR_INTERP_SAMPLEID (3 << 2) |
// do we really want this to be a class ? |
class Modifier |
{ |
public: |
Modifier() : bits(0) { } |
Modifier(unsigned int m) : bits(m) { } |
Modifier(operation op); |
// @return new Modifier applying a after b (asserts if unrepresentable) |
Modifier operator*(const Modifier) const; |
Modifier operator*=(const Modifier m) { *this = *this * m; return *this; } |
Modifier operator==(const Modifier m) const { return m.bits == bits; } |
Modifier operator!=(const Modifier m) const { return m.bits != bits; } |
inline Modifier operator&(const Modifier m) const { return bits & m.bits; } |
inline Modifier operator|(const Modifier m) const { return bits | m.bits; } |
inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; } |
operation getOp() const; |
inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; } |
inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; } |
inline operator bool() const { return bits ? true : false; } |
void applyTo(ImmediateValue &imm) const; |
int print(char *buf, size_t size) const; |
private: |
uint8_t bits; |
}; |
class ValueRef |
{ |
public: |
ValueRef(Value * = NULL); |
ValueRef(const ValueRef&); |
~ValueRef(); |
inline bool exists() const { return value != NULL; } |
void set(Value *); |
void set(const ValueRef&); |
inline Value *get() const { return value; } |
inline Value *rep() const; |
inline Instruction *getInsn() const { return insn; } |
inline void setInsn(Instruction *inst) { insn = inst; } |
inline bool isIndirect(int dim) const { return indirect[dim] >= 0; } |
inline const ValueRef *getIndirect(int dim) const; |
inline DataFile getFile() const; |
inline unsigned getSize() const; |
// SSA: return eventual (traverse MOVs) literal value, if it exists |
bool getImmediate(ImmediateValue&) const; |
public: |
Modifier mod; |
int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i]) |
uint8_t swizzle; |
bool usedAsPtr; // for printing |
private: |
Value *value; |
Instruction *insn; |
}; |
class ValueDef |
{ |
public: |
ValueDef(Value * = NULL); |
ValueDef(const ValueDef&); |
~ValueDef(); |
inline bool exists() const { return value != NULL; } |
inline Value *get() const { return value; } |
inline Value *rep() const; |
void set(Value *); |
bool mayReplace(const ValueRef &); |
void replace(const ValueRef &, bool doSet); // replace all uses of the old value |
inline Instruction *getInsn() const { return insn; } |
inline void setInsn(Instruction *inst) { insn = inst; } |
inline DataFile getFile() const; |
inline unsigned getSize() const; |
inline void setSSA(LValue *); |
inline const LValue *preSSA() const; |
private: |
Value *value; // should make this LValue * ... |
LValue *origin; // pre SSA value |
Instruction *insn; |
}; |
class Value |
{ |
public: |
Value(); |
virtual ~Value() { } |
virtual Value *clone(ClonePolicy<Function>&) const = 0; |
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0; |
virtual bool equals(const Value *, bool strict = false) const; |
virtual bool interfers(const Value *) const; |
virtual bool isUniform() const { return true; } |
inline Value *rep() const { return join; } |
inline Instruction *getUniqueInsn() const; |
inline Instruction *getInsn() const; // use when uniqueness is certain |
inline int refCount() { return uses.size(); } |
inline LValue *asLValue(); |
inline Symbol *asSym(); |
inline ImmediateValue *asImm(); |
inline const Symbol *asSym() const; |
inline const ImmediateValue *asImm() const; |
inline bool inFile(DataFile f) { return reg.file == f; } |
static inline Value *get(Iterator&); |
std::tr1::unordered_set<ValueRef *> uses; |
std::list<ValueDef *> defs; |
typedef std::tr1::unordered_set<ValueRef *>::iterator UseIterator; |
typedef std::tr1::unordered_set<ValueRef *>::const_iterator UseCIterator; |
typedef std::list<ValueDef *>::iterator DefIterator; |
typedef std::list<ValueDef *>::const_iterator DefCIterator; |
int id; |
Storage reg; |
// TODO: these should be in LValue: |
Interval livei; |
Value *join; |
}; |
class LValue : public Value |
{ |
public: |
LValue(Function *, DataFile file); |
LValue(Function *, LValue *); |
~LValue() { } |
virtual bool isUniform() const; |
virtual LValue *clone(ClonePolicy<Function>&) const; |
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
public: |
unsigned compMask : 8; // compound/component mask |
unsigned compound : 1; // used by RA, value involved in split/merge |
unsigned ssa : 1; |
unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0) |
unsigned noSpill : 1; // do not spill (e.g. if spill temporary already) |
}; |
class Symbol : public Value |
{ |
public: |
Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0); |
~Symbol() { } |
virtual Symbol *clone(ClonePolicy<Function>&) const; |
virtual bool equals(const Value *that, bool strict) const; |
virtual bool isUniform() const; |
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
// print with indirect values |
int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const; |
inline void setFile(DataFile file, ubyte fileIndex = 0) |
{ |
reg.file = file; |
reg.fileIndex = fileIndex; |
} |
inline void setOffset(int32_t offset); |
inline void setAddress(Symbol *base, int32_t offset); |
inline void setSV(SVSemantic sv, uint32_t idx = 0); |
inline const Symbol *getBase() const { return baseSym; } |
private: |
Symbol *baseSym; // array base for Symbols representing array elements |
}; |
class ImmediateValue : public Value |
{ |
public: |
ImmediateValue() { } |
ImmediateValue(Program *, uint32_t); |
ImmediateValue(Program *, float); |
ImmediateValue(Program *, double); |
// NOTE: not added to program with |
ImmediateValue(const ImmediateValue *, DataType ty); |
~ImmediateValue() { }; |
virtual ImmediateValue *clone(ClonePolicy<Function>&) const; |
virtual bool equals(const Value *that, bool strict) const; |
// these only work if 'type' is valid (we mostly use untyped literals): |
bool isInteger(const int ival) const; // ival is cast to this' type |
bool isNegative() const; |
bool isPow2() const; |
void applyLog2(); |
// for constant folding: |
ImmediateValue operator+(const ImmediateValue&) const; |
ImmediateValue operator-(const ImmediateValue&) const; |
ImmediateValue operator*(const ImmediateValue&) const; |
ImmediateValue operator/(const ImmediateValue&) const; |
ImmediateValue& operator=(const ImmediateValue&); // only sets value ! |
bool compare(CondCode cc, float fval) const; |
virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; |
}; |
class Instruction |
{ |
public: |
Instruction(); |
Instruction(Function *, operation, DataType); |
virtual ~Instruction(); |
virtual Instruction *clone(ClonePolicy<Function>&, |
Instruction * = NULL) const; |
void setDef(int i, Value *); |
void setSrc(int s, Value *); |
void setSrc(int s, const ValueRef&); |
void swapSources(int a, int b); |
void moveSources(int s, int delta); |
bool setIndirect(int s, int dim, Value *); |
inline ValueRef& src(int s) { return srcs[s]; } |
inline ValueDef& def(int s) { return defs[s]; } |
inline const ValueRef& src(int s) const { return srcs[s]; } |
inline const ValueDef& def(int s) const { return defs[s]; } |
inline Value *getDef(int d) const { return defs[d].get(); } |
inline Value *getSrc(int s) const { return srcs[s].get(); } |
inline Value *getIndirect(int s, int dim) const; |
inline bool defExists(unsigned d) const |
{ |
return d < defs.size() && defs[d].exists(); |
} |
inline bool srcExists(unsigned s) const |
{ |
return s < srcs.size() && srcs[s].exists(); |
} |
inline bool constrainedDefs() const; |
bool setPredicate(CondCode ccode, Value *); |
inline Value *getPredicate() const; |
bool writesPredicate() const; |
inline bool isPredicated() const { return predSrc >= 0; } |
inline void setFlagsSrc(int s, Value *); |
inline void setFlagsDef(int d, Value *); |
inline bool usesFlags() const { return flagsSrc >= 0; } |
unsigned int defCount() const { return defs.size(); }; |
unsigned int defCount(unsigned int mask, bool singleFile = false) const; |
unsigned int srcCount() const { return srcs.size(); }; |
unsigned int srcCount(unsigned int mask, bool singleFile = false) const; |
// save & remove / set indirect[0,1] and predicate source |
void takeExtraSources(int s, Value *[3]); |
void putExtraSources(int s, Value *[3]); |
inline void setType(DataType type) { dType = sType = type; } |
inline void setType(DataType dtype, DataType stype) |
{ |
dType = dtype; |
sType = stype; |
} |
inline bool isPseudo() const { return op < OP_MOV; } |
bool isDead() const; |
bool isNop() const; |
bool isCommutationLegal(const Instruction *) const; // must be adjacent ! |
bool isActionEqual(const Instruction *) const; |
bool isResultEqual(const Instruction *) const; |
void print() const; |
inline CmpInstruction *asCmp(); |
inline TexInstruction *asTex(); |
inline FlowInstruction *asFlow(); |
inline const TexInstruction *asTex() const; |
inline const CmpInstruction *asCmp() const; |
inline const FlowInstruction *asFlow() const; |
public: |
Instruction *next; |
Instruction *prev; |
int id; |
int serial; // CFG order |
operation op; |
DataType dType; // destination or defining type |
DataType sType; // source or secondary type |
CondCode cc; |
RoundMode rnd; |
CacheMode cache; |
uint16_t subOp; // quadop, 1 for mul-high, etc. |
unsigned encSize : 4; // encoding size in bytes |
unsigned saturate : 1; // to [0.0f, 1.0f] |
unsigned join : 1; // converge control flow (use OP_JOIN until end) |
unsigned fixed : 1; // prevent dead code elimination |
unsigned terminator : 1; // end of basic block |
unsigned ftz : 1; // flush denormal to zero |
unsigned dnz : 1; // denormals, NaN are zero |
unsigned ipa : 4; // interpolation mode |
unsigned lanes : 4; |
unsigned perPatch : 1; |
unsigned exit : 1; // terminate program after insn |
unsigned mask : 4; // for vector ops |
int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor |
int8_t predSrc; |
int8_t flagsDef; |
int8_t flagsSrc; |
uint32_t sched; // scheduling data (NOTE: maybe move to separate storage) |
BasicBlock *bb; |
protected: |
std::deque<ValueDef> defs; // no gaps ! |
std::deque<ValueRef> srcs; // no gaps ! |
// instruction specific methods: |
// (don't want to subclass, would need more constructors and memory pools) |
public: |
inline void setInterpolate(unsigned int mode) { ipa = mode; } |
unsigned int getInterpMode() const { return ipa & 0x3; } |
unsigned int getSampleMode() const { return ipa & 0xc; } |
private: |
void init(); |
}; |
enum TexQuery |
{ |
TXQ_DIMS, |
TXQ_TYPE, |
TXQ_SAMPLE_POSITION, |
TXQ_FILTER, |
TXQ_LOD, |
TXQ_WRAP, |
TXQ_BORDER_COLOUR |
}; |
class TexInstruction : public Instruction |
{ |
public: |
class Target |
{ |
public: |
Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { } |
const char *getName() const { return descTable[target].name; } |
unsigned int getArgCount() const { return descTable[target].argc; } |
unsigned int getDim() const { return descTable[target].dim; } |
int isArray() const { return descTable[target].array ? 1 : 0; } |
int isCube() const { return descTable[target].cube ? 1 : 0; } |
int isShadow() const { return descTable[target].shadow ? 1 : 0; } |
int isMS() const { |
return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; } |
void clearMS() { |
if (isMS()) { |
if (isArray()) |
target = TEX_TARGET_2D_ARRAY; |
else |
target = TEX_TARGET_2D; |
} |
} |
Target& operator=(TexTarget targ) |
{ |
assert(targ < TEX_TARGET_COUNT); |
target = targ; |
return *this; |
} |
inline bool operator==(TexTarget targ) const { return target == targ; } |
inline bool operator!=(TexTarget targ) const { return target != targ; } |
enum TexTarget getEnum() const { return target; } |
private: |
struct Desc |
{ |
char name[19]; |
uint8_t dim; |
uint8_t argc; |
bool array; |
bool cube; |
bool shadow; |
}; |
static const struct Desc descTable[TEX_TARGET_COUNT]; |
private: |
enum TexTarget target; |
}; |
public: |
TexInstruction(Function *, operation); |
virtual ~TexInstruction(); |
virtual TexInstruction *clone(ClonePolicy<Function>&, |
Instruction * = NULL) const; |
inline void setTexture(Target targ, uint8_t r, uint8_t s) |
{ |
tex.r = r; |
tex.s = s; |
tex.target = targ; |
} |
void setIndirectR(Value *); |
void setIndirectS(Value *); |
inline Value *getIndirectR() const; |
inline Value *getIndirectS() const; |
public: |
struct { |
Target target; |
uint16_t r; |
uint16_t s; |
int8_t rIndirectSrc; |
int8_t sIndirectSrc; |
uint8_t mask; |
uint8_t gatherComp; |
bool liveOnly; // only execute on live pixels of a quad (optimization) |
bool levelZero; |
bool derivAll; |
int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets |
int8_t offset[3]; // only used on nv50 |
enum TexQuery query; |
} tex; |
ValueRef dPdx[3]; |
ValueRef dPdy[3]; |
ValueRef offset[4][3]; |
}; |
class CmpInstruction : public Instruction |
{ |
public: |
CmpInstruction(Function *, operation); |
virtual CmpInstruction *clone(ClonePolicy<Function>&, |
Instruction * = NULL) const; |
void setCondition(CondCode cond) { setCond = cond; } |
CondCode getCondition() const { return setCond; } |
public: |
CondCode setCond; |
}; |
class FlowInstruction : public Instruction |
{ |
public: |
FlowInstruction(Function *, operation, void *target); |
virtual FlowInstruction *clone(ClonePolicy<Function>&, |
Instruction * = NULL) const; |
public: |
unsigned allWarp : 1; |
unsigned absolute : 1; |
unsigned limit : 1; |
unsigned builtin : 1; // true for calls to emulation code |
unsigned indirect : 1; // target in src(0) |
union { |
BasicBlock *bb; |
int builtin; |
Function *fn; |
} target; |
}; |
class BasicBlock |
{ |
public: |
BasicBlock(Function *); |
~BasicBlock(); |
BasicBlock *clone(ClonePolicy<Function>&) const; |
inline int getId() const { return id; } |
inline unsigned int getInsnCount() const { return numInsns; } |
inline bool isTerminated() const { return exit && exit->terminator; } |
bool dominatedBy(BasicBlock *bb); |
inline bool reachableBy(const BasicBlock *by, const BasicBlock *term); |
// returns mask of conditional out blocks |
// e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF |
unsigned int initiatesSimpleConditional() const; |
public: |
Function *getFunction() const { return func; } |
Program *getProgram() const { return program; } |
Instruction *getEntry() const { return entry; } // first non-phi instruction |
Instruction *getPhi() const { return phi; } |
Instruction *getFirst() const { return phi ? phi : entry; } |
Instruction *getExit() const { return exit; } |
void insertHead(Instruction *); |
void insertTail(Instruction *); |
void insertBefore(Instruction *, Instruction *); |
void insertAfter(Instruction *, Instruction *); |
void remove(Instruction *); |
void permuteAdjacent(Instruction *, Instruction *); |
BasicBlock *idom() const; |
// NOTE: currently does not rebuild the dominator tree |
BasicBlock *splitBefore(Instruction *, bool attach = true); |
BasicBlock *splitAfter(Instruction *, bool attach = true); |
DLList& getDF() { return df; } |
DLList::Iterator iterDF() { return df.iterator(); } |
static inline BasicBlock *get(Iterator&); |
static inline BasicBlock *get(Graph::Node *); |
public: |
Graph::Node cfg; // first edge is branch *taken* (the ELSE branch) |
Graph::Node dom; |
BitSet liveSet; |
BitSet defSet; |
uint32_t binPos; |
uint32_t binSize; |
Instruction *joinAt; // for quick reference |
bool explicitCont; // loop headers: true if loop contains continue stmts |
private: |
int id; |
DLList df; |
Instruction *phi; |
Instruction *entry; |
Instruction *exit; |
unsigned int numInsns; |
private: |
Function *func; |
Program *program; |
void splitCommon(Instruction *, BasicBlock *, bool attach); |
}; |
class Function |
{ |
public: |
Function(Program *, const char *name, uint32_t label); |
~Function(); |
static inline Function *get(Graph::Node *node); |
inline Program *getProgram() const { return prog; } |
inline const char *getName() const { return name; } |
inline int getId() const { return id; } |
inline uint32_t getLabel() const { return label; } |
void print(); |
void printLiveIntervals() const; |
void printCFGraph(const char *filePath); |
bool setEntry(BasicBlock *); |
bool setExit(BasicBlock *); |
unsigned int orderInstructions(ArrayList&); |
inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); } |
inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); } |
inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); } |
inline LValue *getLValue(int id); |
void buildLiveSets(); |
void buildDefSets(); |
bool convertToSSA(); |
public: |
std::deque<ValueDef> ins; |
std::deque<ValueRef> outs; |
std::deque<Value *> clobbers; |
Graph cfg; |
Graph::Node *cfgExit; |
Graph *domTree; |
Graph::Node call; // node in the call graph |
BasicBlock **bbArray; // BBs in emission order |
int bbCount; |
unsigned int loopNestingBound; |
int regClobberMax; |
uint32_t binPos; |
uint32_t binSize; |
Value *stackPtr; |
uint32_t tlsBase; // base address for l[] space (if no stack pointer is used) |
uint32_t tlsSize; |
ArrayList allBBlocks; |
ArrayList allInsns; |
ArrayList allLValues; |
private: |
void buildLiveSetsPreSSA(BasicBlock *, const int sequence); |
void buildDefSetsPreSSA(BasicBlock *bb, const int seq); |
private: |
uint32_t label; |
int id; |
const char *const name; |
Program *prog; |
}; |
enum CGStage |
{ |
CG_STAGE_PRE_SSA, |
CG_STAGE_SSA, // expected directly before register allocation |
CG_STAGE_POST_RA |
}; |
class Program |
{ |
public: |
enum Type |
{ |
TYPE_VERTEX, |
TYPE_TESSELLATION_CONTROL, |
TYPE_TESSELLATION_EVAL, |
TYPE_GEOMETRY, |
TYPE_FRAGMENT, |
TYPE_COMPUTE |
}; |
Program(Type type, Target *targ); |
~Program(); |
void print(); |
Type getType() const { return progType; } |
inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); } |
inline void del(Function *fn, int& id) { allFuncs.remove(id); } |
inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } |
bool makeFromTGSI(struct nv50_ir_prog_info *); |
bool makeFromSM4(struct nv50_ir_prog_info *); |
bool convertToSSA(); |
bool optimizeSSA(int level); |
bool optimizePostRA(int level); |
bool registerAllocation(); |
bool emitBinary(struct nv50_ir_prog_info *); |
const Target *getTarget() const { return target; } |
private: |
void emitSymbolTable(struct nv50_ir_prog_info *); |
Type progType; |
Target *target; |
public: |
Function *main; |
Graph calls; |
ArrayList allFuncs; |
ArrayList allRValues; |
uint32_t *code; |
uint32_t binSize; |
uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL |
int maxGPR; |
MemoryPool mem_Instruction; |
MemoryPool mem_CmpInstruction; |
MemoryPool mem_TexInstruction; |
MemoryPool mem_FlowInstruction; |
MemoryPool mem_LValue; |
MemoryPool mem_Symbol; |
MemoryPool mem_ImmediateValue; |
uint32_t dbgFlags; |
uint8_t optLevel; |
void *targetPriv; // e.g. to carry information between passes |
const struct nv50_ir_prog_info *driver; // for driver configuration |
void releaseInstruction(Instruction *); |
void releaseValue(Value *); |
}; |
// TODO: add const version |
class Pass |
{ |
public: |
bool run(Program *, bool ordered = false, bool skipPhi = false); |
bool run(Function *, bool ordered = false, bool skipPhi = false); |
private: |
// return false to continue with next entity on next higher level |
virtual bool visit(Function *) { return true; } |
virtual bool visit(BasicBlock *) { return true; } |
virtual bool visit(Instruction *) { return false; } |
bool doRun(Program *, bool ordered, bool skipPhi); |
bool doRun(Function *, bool ordered, bool skipPhi); |
protected: |
bool err; |
Function *func; |
Program *prog; |
}; |
// ============================================================================= |
#include "codegen/nv50_ir_inlines.h" |
} // namespace nv50_ir |
#endif // __NV50_IR_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_bb.cpp |
---|
0,0 → 1,550 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
namespace nv50_ir { |
Function::Function(Program *p, const char *fnName, uint32_t label) |
: call(this), |
label(label), |
name(fnName), |
prog(p) |
{ |
cfgExit = NULL; |
domTree = NULL; |
bbArray = NULL; |
bbCount = 0; |
loopNestingBound = 0; |
regClobberMax = 0; |
binPos = 0; |
binSize = 0; |
stackPtr = NULL; |
tlsBase = 0; |
tlsSize = 0; |
prog->add(this, id); |
} |
Function::~Function() |
{ |
prog->del(this, id); |
if (domTree) |
delete domTree; |
if (bbArray) |
delete[] bbArray; |
// clear value refs and defs |
ins.clear(); |
outs.clear(); |
for (ArrayList::Iterator it = allInsns.iterator(); !it.end(); it.next()) |
delete_Instruction(prog, reinterpret_cast<Instruction *>(it.get())); |
for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) |
delete_Value(prog, reinterpret_cast<LValue *>(it.get())); |
for (ArrayList::Iterator BBs = allBBlocks.iterator(); !BBs.end(); BBs.next()) |
delete reinterpret_cast<BasicBlock *>(BBs.get()); |
} |
BasicBlock::BasicBlock(Function *fn) : cfg(this), dom(this), func(fn) |
{ |
program = func->getProgram(); |
joinAt = phi = entry = exit = NULL; |
numInsns = 0; |
binPos = 0; |
binSize = 0; |
explicitCont = false; |
func->add(this, this->id); |
} |
BasicBlock::~BasicBlock() |
{ |
// nothing yet |
} |
BasicBlock * |
BasicBlock::clone(ClonePolicy<Function>& pol) const |
{ |
BasicBlock *bb = new BasicBlock(pol.context()); |
pol.set(this, bb); |
for (Instruction *i = getFirst(); i; i = i->next) |
bb->insertTail(i->clone(pol)); |
pol.context()->cfg.insert(&bb->cfg); |
for (Graph::EdgeIterator it = cfg.outgoing(); !it.end(); it.next()) { |
BasicBlock *obb = BasicBlock::get(it.getNode()); |
bb->cfg.attach(&pol.get(obb)->cfg, it.getType()); |
} |
return bb; |
} |
BasicBlock * |
BasicBlock::idom() const |
{ |
Graph::Node *dn = dom.parent(); |
return dn ? BasicBlock::get(dn) : NULL; |
} |
void |
BasicBlock::insertHead(Instruction *inst) |
{ |
assert(inst->next == 0 && inst->prev == 0); |
if (inst->op == OP_PHI) { |
if (phi) { |
insertBefore(phi, inst); |
} else { |
if (entry) { |
insertBefore(entry, inst); |
} else { |
assert(!exit); |
phi = exit = inst; |
inst->bb = this; |
++numInsns; |
} |
} |
} else { |
if (entry) { |
insertBefore(entry, inst); |
} else { |
if (phi) { |
insertAfter(exit, inst); // after last phi |
} else { |
assert(!exit); |
entry = exit = inst; |
inst->bb = this; |
++numInsns; |
} |
} |
} |
} |
void |
BasicBlock::insertTail(Instruction *inst) |
{ |
assert(inst->next == 0 && inst->prev == 0); |
if (inst->op == OP_PHI) { |
if (entry) { |
insertBefore(entry, inst); |
} else |
if (exit) { |
assert(phi); |
insertAfter(exit, inst); |
} else { |
assert(!phi); |
phi = exit = inst; |
inst->bb = this; |
++numInsns; |
} |
} else { |
if (exit) { |
insertAfter(exit, inst); |
} else { |
assert(!phi); |
entry = exit = inst; |
inst->bb = this; |
++numInsns; |
} |
} |
} |
void |
BasicBlock::insertBefore(Instruction *q, Instruction *p) |
{ |
assert(p && q); |
assert(p->next == 0 && p->prev == 0); |
if (q == entry) { |
if (p->op == OP_PHI) { |
if (!phi) |
phi = p; |
} else { |
entry = p; |
} |
} else |
if (q == phi) { |
assert(p->op == OP_PHI); |
phi = p; |
} |
p->next = q; |
p->prev = q->prev; |
if (p->prev) |
p->prev->next = p; |
q->prev = p; |
p->bb = this; |
++numInsns; |
} |
void |
BasicBlock::insertAfter(Instruction *p, Instruction *q) |
{ |
assert(p && q); |
assert(q->op != OP_PHI || p->op == OP_PHI); |
assert(q->next == 0 && q->prev == 0); |
if (p == exit) |
exit = q; |
if (p->op == OP_PHI && q->op != OP_PHI) |
entry = q; |
q->prev = p; |
q->next = p->next; |
if (q->next) |
q->next->prev = q; |
p->next = q; |
q->bb = this; |
++numInsns; |
} |
void |
BasicBlock::remove(Instruction *insn) |
{ |
assert(insn->bb == this); |
if (insn->prev) |
insn->prev->next = insn->next; |
if (insn->next) |
insn->next->prev = insn->prev; |
else |
exit = insn->prev; |
if (insn == entry) { |
if (insn->next) |
entry = insn->next; |
else |
if (insn->prev && insn->prev->op != OP_PHI) |
entry = insn->prev; |
else |
entry = NULL; |
} |
if (insn == phi) |
phi = (insn->next && insn->next->op == OP_PHI) ? insn->next : 0; |
--numInsns; |
insn->bb = NULL; |
insn->next = |
insn->prev = NULL; |
} |
void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b) |
{ |
assert(a->bb == b->bb); |
if (a->next != b) { |
Instruction *i = a; |
a = b; |
b = i; |
} |
assert(a->next == b); |
assert(a->op != OP_PHI && b->op != OP_PHI); |
if (b == exit) |
exit = a; |
if (a == entry) |
entry = b; |
b->prev = a->prev; |
a->next = b->next; |
b->next = a; |
a->prev = b; |
if (b->prev) |
b->prev->next = b; |
if (a->prev) |
a->next->prev = a; |
} |
void |
BasicBlock::splitCommon(Instruction *insn, BasicBlock *bb, bool attach) |
{ |
bb->entry = insn; |
if (insn) { |
exit = insn->prev; |
insn->prev = NULL; |
} |
if (exit) |
exit->next = NULL; |
else |
entry = NULL; |
while (!cfg.outgoing(true).end()) { |
Graph::Edge *e = cfg.outgoing(true).getEdge(); |
bb->cfg.attach(e->getTarget(), e->getType()); |
this->cfg.detach(e->getTarget()); |
} |
for (; insn; insn = insn->next) { |
this->numInsns--; |
bb->numInsns++; |
insn->bb = bb; |
bb->exit = insn; |
} |
if (attach) |
this->cfg.attach(&bb->cfg, Graph::Edge::TREE); |
} |
BasicBlock * |
BasicBlock::splitBefore(Instruction *insn, bool attach) |
{ |
BasicBlock *bb = new BasicBlock(func); |
assert(!insn || insn->op != OP_PHI); |
splitCommon(insn, bb, attach); |
return bb; |
} |
BasicBlock * |
BasicBlock::splitAfter(Instruction *insn, bool attach) |
{ |
BasicBlock *bb = new BasicBlock(func); |
assert(!insn || insn->op != OP_PHI); |
bb->joinAt = joinAt; |
joinAt = NULL; |
splitCommon(insn ? insn->next : NULL, bb, attach); |
return bb; |
} |
bool |
BasicBlock::dominatedBy(BasicBlock *that) |
{ |
Graph::Node *bn = &that->dom; |
Graph::Node *dn = &this->dom; |
while (dn && dn != bn) |
dn = dn->parent(); |
return dn != NULL; |
} |
unsigned int |
BasicBlock::initiatesSimpleConditional() const |
{ |
Graph::Node *out[2]; |
int n; |
Graph::Edge::Type eR; |
if (cfg.outgoingCount() != 2) // -> if and -> else/endif |
return false; |
n = 0; |
for (Graph::EdgeIterator ei = cfg.outgoing(); !ei.end(); ei.next()) |
out[n++] = ei.getNode(); |
eR = out[1]->outgoing().getType(); |
// IF block is out edge to the right |
if (eR == Graph::Edge::CROSS || eR == Graph::Edge::BACK) |
return 0x2; |
if (out[1]->outgoingCount() != 1) // 0 is IF { RET; }, >1 is more divergence |
return 0x0; |
// do they reconverge immediately ? |
if (out[1]->outgoing().getNode() == out[0]) |
return 0x1; |
if (out[0]->outgoingCount() == 1) |
if (out[0]->outgoing().getNode() == out[1]->outgoing().getNode()) |
return 0x3; |
return 0x0; |
} |
bool |
Function::setEntry(BasicBlock *bb) |
{ |
if (cfg.getRoot()) |
return false; |
cfg.insert(&bb->cfg); |
return true; |
} |
bool |
Function::setExit(BasicBlock *bb) |
{ |
if (cfgExit) |
return false; |
cfgExit = &bb->cfg; |
return true; |
} |
unsigned int |
Function::orderInstructions(ArrayList &result) |
{ |
result.clear(); |
for (IteratorRef it = cfg.iteratorCFG(); !it->end(); it->next()) { |
BasicBlock *bb = |
BasicBlock::get(reinterpret_cast<Graph::Node *>(it->get())); |
for (Instruction *insn = bb->getFirst(); insn; insn = insn->next) |
result.insert(insn, insn->serial); |
} |
return result.getSize(); |
} |
void |
Function::buildLiveSets() |
{ |
for (unsigned i = 0; i <= loopNestingBound; ++i) |
buildLiveSetsPreSSA(BasicBlock::get(cfg.getRoot()), cfg.nextSequence()); |
for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next()) |
BasicBlock::get(bi)->liveSet.marker = false; |
} |
void |
Function::buildDefSets() |
{ |
for (unsigned i = 0; i <= loopNestingBound; ++i) |
buildDefSetsPreSSA(BasicBlock::get(cfgExit), cfg.nextSequence()); |
for (ArrayList::Iterator bi = allBBlocks.iterator(); !bi.end(); bi.next()) |
BasicBlock::get(bi)->liveSet.marker = false; |
} |
bool |
Pass::run(Program *prog, bool ordered, bool skipPhi) |
{ |
this->prog = prog; |
err = false; |
return doRun(prog, ordered, skipPhi); |
} |
bool |
Pass::doRun(Program *prog, bool ordered, bool skipPhi) |
{ |
for (IteratorRef it = prog->calls.iteratorDFS(false); |
!it->end(); it->next()) { |
Graph::Node *n = reinterpret_cast<Graph::Node *>(it->get()); |
if (!doRun(Function::get(n), ordered, skipPhi)) |
return false; |
} |
return !err; |
} |
bool |
Pass::run(Function *func, bool ordered, bool skipPhi) |
{ |
prog = func->getProgram(); |
err = false; |
return doRun(func, ordered, skipPhi); |
} |
bool |
Pass::doRun(Function *func, bool ordered, bool skipPhi) |
{ |
IteratorRef bbIter; |
BasicBlock *bb; |
Instruction *insn, *next; |
this->func = func; |
if (!visit(func)) |
return false; |
bbIter = ordered ? func->cfg.iteratorCFG() : func->cfg.iteratorDFS(); |
for (; !bbIter->end(); bbIter->next()) { |
bb = BasicBlock::get(reinterpret_cast<Graph::Node *>(bbIter->get())); |
if (!visit(bb)) |
break; |
for (insn = skipPhi ? bb->getEntry() : bb->getFirst(); insn != NULL; |
insn = next) { |
next = insn->next; |
if (!visit(insn)) |
break; |
} |
} |
return !err; |
} |
void |
Function::printCFGraph(const char *filePath) |
{ |
FILE *out = fopen(filePath, "a"); |
if (!out) { |
ERROR("failed to open file: %s\n", filePath); |
return; |
} |
INFO("printing control flow graph to: %s\n", filePath); |
fprintf(out, "digraph G {\n"); |
for (IteratorRef it = cfg.iteratorDFS(); !it->end(); it->next()) { |
BasicBlock *bb = BasicBlock::get( |
reinterpret_cast<Graph::Node *>(it->get())); |
int idA = bb->getId(); |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
int idB = BasicBlock::get(ei.getNode())->getId(); |
switch (ei.getType()) { |
case Graph::Edge::TREE: |
fprintf(out, "\t%i -> %i;\n", idA, idB); |
break; |
case Graph::Edge::FORWARD: |
fprintf(out, "\t%i -> %i [color=green];\n", idA, idB); |
break; |
case Graph::Edge::CROSS: |
fprintf(out, "\t%i -> %i [color=red];\n", idA, idB); |
break; |
case Graph::Edge::BACK: |
fprintf(out, "\t%i -> %i;\n", idA, idB); |
break; |
case Graph::Edge::DUMMY: |
fprintf(out, "\t%i -> %i [style=dotted];\n", idA, idB); |
break; |
default: |
assert(0); |
break; |
} |
} |
} |
fprintf(out, "}\n"); |
fclose(out); |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp |
---|
0,0 → 1,615 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_build_util.h" |
namespace nv50_ir { |
BuildUtil::BuildUtil() |
{ |
init(NULL); |
} |
BuildUtil::BuildUtil(Program *prog) |
{ |
init(prog); |
} |
void |
BuildUtil::init(Program *prog) |
{ |
this->prog = prog; |
func = NULL; |
bb = NULL; |
pos = NULL; |
memset(imms, 0, sizeof(imms)); |
immCount = 0; |
} |
void |
BuildUtil::addImmediate(ImmediateValue *imm) |
{ |
if (immCount > (NV50_IR_BUILD_IMM_HT_SIZE * 3) / 4) |
return; |
unsigned int pos = u32Hash(imm->reg.data.u32); |
while (imms[pos]) |
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE; |
imms[pos] = imm; |
immCount++; |
} |
Instruction * |
BuildUtil::mkOp1(operation op, DataType ty, Value *dst, Value *src) |
{ |
Instruction *insn = new_Instruction(func, op, ty); |
insn->setDef(0, dst); |
insn->setSrc(0, src); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkOp2(operation op, DataType ty, Value *dst, |
Value *src0, Value *src1) |
{ |
Instruction *insn = new_Instruction(func, op, ty); |
insn->setDef(0, dst); |
insn->setSrc(0, src0); |
insn->setSrc(1, src1); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkOp3(operation op, DataType ty, Value *dst, |
Value *src0, Value *src1, Value *src2) |
{ |
Instruction *insn = new_Instruction(func, op, ty); |
insn->setDef(0, dst); |
insn->setSrc(0, src0); |
insn->setSrc(1, src1); |
insn->setSrc(2, src2); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkLoad(DataType ty, Value *dst, Symbol *mem, Value *ptr) |
{ |
Instruction *insn = new_Instruction(func, OP_LOAD, ty); |
insn->setDef(0, dst); |
insn->setSrc(0, mem); |
if (ptr) |
insn->setIndirect(0, 0, ptr); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkStore(operation op, DataType ty, Symbol *mem, Value *ptr, |
Value *stVal) |
{ |
Instruction *insn = new_Instruction(func, op, ty); |
insn->setSrc(0, mem); |
insn->setSrc(1, stVal); |
if (ptr) |
insn->setIndirect(0, 0, ptr); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkFetch(Value *dst, DataType ty, DataFile file, int32_t offset, |
Value *attrRel, Value *primRel) |
{ |
Symbol *sym = mkSymbol(file, 0, ty, offset); |
Instruction *insn = mkOp1(OP_VFETCH, ty, dst, sym); |
insn->setIndirect(0, 0, attrRel); |
insn->setIndirect(0, 1, primRel); |
// already inserted |
return insn; |
} |
Instruction * |
BuildUtil::mkInterp(unsigned mode, Value *dst, int32_t offset, Value *rel) |
{ |
operation op = OP_LINTERP; |
DataType ty = TYPE_F32; |
if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_FLAT) |
ty = TYPE_U32; |
else |
if ((mode & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_PERSPECTIVE) |
op = OP_PINTERP; |
Symbol *sym = mkSymbol(FILE_SHADER_INPUT, 0, ty, offset); |
Instruction *insn = mkOp1(op, ty, dst, sym); |
insn->setIndirect(0, 0, rel); |
insn->setInterpolate(mode); |
return insn; |
} |
Instruction * |
BuildUtil::mkMov(Value *dst, Value *src, DataType ty) |
{ |
Instruction *insn = new_Instruction(func, OP_MOV, ty); |
insn->setDef(0, dst); |
insn->setSrc(0, src); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkMovToReg(int id, Value *src) |
{ |
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(src->reg.size)); |
insn->setDef(0, new_LValue(func, FILE_GPR)); |
insn->getDef(0)->reg.data.id = id; |
insn->setSrc(0, src); |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkMovFromReg(Value *dst, int id) |
{ |
Instruction *insn = new_Instruction(func, OP_MOV, typeOfSize(dst->reg.size)); |
insn->setDef(0, dst); |
insn->setSrc(0, new_LValue(func, FILE_GPR)); |
insn->getSrc(0)->reg.data.id = id; |
insert(insn); |
return insn; |
} |
Instruction * |
BuildUtil::mkCvt(operation op, |
DataType dstTy, Value *dst, DataType srcTy, Value *src) |
{ |
Instruction *insn = new_Instruction(func, op, dstTy); |
insn->setType(dstTy, srcTy); |
insn->setDef(0, dst); |
insn->setSrc(0, src); |
insert(insn); |
return insn; |
} |
CmpInstruction * |
BuildUtil::mkCmp(operation op, CondCode cc, DataType dstTy, Value *dst, |
DataType srcTy, Value *src0, Value *src1, Value *src2) |
{ |
CmpInstruction *insn = new_CmpInstruction(func, op); |
insn->setType((dst->reg.file == FILE_PREDICATE || |
dst->reg.file == FILE_FLAGS) ? TYPE_U8 : dstTy, srcTy); |
insn->setCondition(cc); |
insn->setDef(0, dst); |
insn->setSrc(0, src0); |
insn->setSrc(1, src1); |
if (src2) |
insn->setSrc(2, src2); |
if (dst->reg.file == FILE_FLAGS) |
insn->flagsDef = 0; |
insert(insn); |
return insn; |
} |
TexInstruction * |
BuildUtil::mkTex(operation op, TexTarget targ, |
uint16_t tic, uint16_t tsc, |
const std::vector<Value *> &def, |
const std::vector<Value *> &src) |
{ |
TexInstruction *tex = new_TexInstruction(func, op); |
for (size_t d = 0; d < def.size() && def[d]; ++d) |
tex->setDef(d, def[d]); |
for (size_t s = 0; s < src.size() && src[s]; ++s) |
tex->setSrc(s, src[s]); |
tex->setTexture(targ, tic, tsc); |
insert(tex); |
return tex; |
} |
Instruction * |
BuildUtil::mkQuadop(uint8_t q, Value *def, uint8_t l, Value *src0, Value *src1) |
{ |
Instruction *quadop = mkOp2(OP_QUADOP, TYPE_F32, def, src0, src1); |
quadop->subOp = q; |
quadop->lanes = l; |
return quadop; |
} |
Instruction * |
BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc) |
{ |
LValue *def0 = getSSA(); |
LValue *def1 = getSSA(); |
mkMov(def0, trSrc)->setPredicate(CC_P, pred); |
mkMov(def1, flSrc)->setPredicate(CC_NOT_P, pred); |
return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1); |
} |
Instruction * |
BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val) |
{ |
Instruction *insn = NULL; |
const DataType fTy = typeOfSize(halfSize * 2); |
if (val->reg.file == FILE_IMMEDIATE) |
val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0); |
if (isMemoryFile(val->reg.file)) { |
h[0] = cloneShallow(getFunction(), val); |
h[1] = cloneShallow(getFunction(), val); |
h[0]->reg.size = halfSize; |
h[1]->reg.size = halfSize; |
h[1]->reg.data.offset += halfSize; |
} else { |
h[0] = getSSA(halfSize, val->reg.file); |
h[1] = getSSA(halfSize, val->reg.file); |
insn = mkOp1(OP_SPLIT, fTy, h[0], val); |
insn->setDef(1, h[1]); |
} |
return insn; |
} |
FlowInstruction * |
BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred) |
{ |
FlowInstruction *insn = new_FlowInstruction(func, op, targ); |
if (pred) |
insn->setPredicate(cc, pred); |
insert(insn); |
return insn; |
} |
void |
BuildUtil::mkClobber(DataFile f, uint32_t rMask, int unit) |
{ |
static const uint16_t baseSize2[16] = |
{ |
0x0000, 0x0010, 0x0011, 0x0020, 0x0012, 0x1210, 0x1211, 0x1220, |
0x0013, 0x1310, 0x1311, 0x1320, 0x0022, 0x2210, 0x2211, 0x0040, |
}; |
int base = 0; |
for (; rMask; rMask >>= 4, base += 4) { |
const uint32_t mask = rMask & 0xf; |
if (!mask) |
continue; |
int base1 = (baseSize2[mask] >> 0) & 0xf; |
int size1 = (baseSize2[mask] >> 4) & 0xf; |
int base2 = (baseSize2[mask] >> 8) & 0xf; |
int size2 = (baseSize2[mask] >> 12) & 0xf; |
Instruction *insn = mkOp(OP_NOP, TYPE_NONE, NULL); |
if (1) { // size1 can't be 0 |
LValue *reg = new_LValue(func, f); |
reg->reg.size = size1 << unit; |
reg->reg.data.id = base + base1; |
insn->setDef(0, reg); |
} |
if (size2) { |
LValue *reg = new_LValue(func, f); |
reg->reg.size = size2 << unit; |
reg->reg.data.id = base + base2; |
insn->setDef(1, reg); |
} |
} |
} |
ImmediateValue * |
BuildUtil::mkImm(uint32_t u) |
{ |
unsigned int pos = u32Hash(u); |
while (imms[pos] && imms[pos]->reg.data.u32 != u) |
pos = (pos + 1) % NV50_IR_BUILD_IMM_HT_SIZE; |
ImmediateValue *imm = imms[pos]; |
if (!imm) { |
imm = new_ImmediateValue(prog, u); |
addImmediate(imm); |
} |
return imm; |
} |
ImmediateValue * |
BuildUtil::mkImm(uint64_t u) |
{ |
ImmediateValue *imm = new_ImmediateValue(prog, (uint32_t)0); |
imm->reg.size = 8; |
imm->reg.type = TYPE_U64; |
imm->reg.data.u64 = u; |
return imm; |
} |
ImmediateValue * |
BuildUtil::mkImm(float f) |
{ |
union { |
float f32; |
uint32_t u32; |
} u; |
u.f32 = f; |
return mkImm(u.u32); |
} |
Value * |
BuildUtil::loadImm(Value *dst, float f) |
{ |
return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f)); |
} |
Value * |
BuildUtil::loadImm(Value *dst, uint32_t u) |
{ |
return mkOp1v(OP_MOV, TYPE_U32, dst ? dst : getScratch(), mkImm(u)); |
} |
Value * |
BuildUtil::loadImm(Value *dst, uint64_t u) |
{ |
return mkOp1v(OP_MOV, TYPE_U64, dst ? dst : getScratch(8), mkImm(u)); |
} |
Symbol * |
BuildUtil::mkSymbol(DataFile file, int8_t fileIndex, DataType ty, |
uint32_t baseAddr) |
{ |
Symbol *sym = new_Symbol(prog, file, fileIndex); |
sym->setOffset(baseAddr); |
sym->reg.type = ty; |
sym->reg.size = typeSizeof(ty); |
return sym; |
} |
Symbol * |
BuildUtil::mkSysVal(SVSemantic svName, uint32_t svIndex) |
{ |
Symbol *sym = new_Symbol(prog, FILE_SYSTEM_VALUE, 0); |
assert(svIndex < 4 || |
(svName == SV_CLIP_DISTANCE || svName == SV_TESS_FACTOR)); |
switch (svName) { |
case SV_POSITION: |
case SV_FACE: |
case SV_YDIR: |
case SV_POINT_SIZE: |
case SV_POINT_COORD: |
case SV_CLIP_DISTANCE: |
case SV_TESS_FACTOR: |
sym->reg.type = TYPE_F32; |
break; |
default: |
sym->reg.type = TYPE_U32; |
break; |
} |
sym->reg.size = typeSizeof(sym->reg.type); |
sym->reg.data.sv.sv = svName; |
sym->reg.data.sv.index = svIndex; |
return sym; |
} |
void |
BuildUtil::DataArray::setup(unsigned array, unsigned arrayIdx, |
uint32_t base, int len, int vecDim, int eltSize, |
DataFile file, int8_t fileIdx) |
{ |
this->array = array; |
this->arrayIdx = arrayIdx; |
this->baseAddr = base; |
this->arrayLen = len; |
this->vecDim = vecDim; |
this->eltSize = eltSize; |
this->file = file; |
this->regOnly = !isMemoryFile(file); |
if (!regOnly) { |
baseSym = new_Symbol(up->getProgram(), file, fileIdx); |
baseSym->setOffset(baseAddr); |
baseSym->reg.size = eltSize; |
} else { |
baseSym = NULL; |
} |
} |
Value * |
BuildUtil::DataArray::acquire(ValueMap &m, int i, int c) |
{ |
if (regOnly) { |
Value *v = lookup(m, i, c); |
if (!v) |
v = insert(m, i, c, new_LValue(up->getFunction(), file)); |
return v; |
} else { |
return up->getScratch(); |
} |
} |
Value * |
BuildUtil::DataArray::load(ValueMap &m, int i, int c, Value *ptr) |
{ |
if (regOnly) { |
Value *v = lookup(m, i, c); |
if (!v) |
v = insert(m, i, c, new_LValue(up->getFunction(), file)); |
return v; |
} else { |
Value *sym = lookup(m, i, c); |
if (!sym) |
sym = insert(m, i, c, mkSymbol(i, c)); |
return up->mkLoadv(typeOfSize(eltSize), static_cast<Symbol *>(sym), ptr); |
} |
} |
void |
BuildUtil::DataArray::store(ValueMap &m, int i, int c, Value *ptr, Value *value) |
{ |
if (regOnly) { |
assert(!ptr); |
if (!lookup(m, i, c)) |
insert(m, i, c, value); |
assert(lookup(m, i, c) == value); |
} else { |
Value *sym = lookup(m, i, c); |
if (!sym) |
sym = insert(m, i, c, mkSymbol(i, c)); |
const DataType stTy = typeOfSize(value->reg.size); |
up->mkStore(OP_STORE, stTy, static_cast<Symbol *>(sym), ptr, value); |
} |
} |
Symbol * |
BuildUtil::DataArray::mkSymbol(int i, int c) |
{ |
const unsigned int idx = i * vecDim + c; |
Symbol *sym = new_Symbol(up->getProgram(), file, 0); |
assert(baseSym || (idx < arrayLen && c < vecDim)); |
sym->reg.size = eltSize; |
sym->reg.type = typeOfSize(eltSize); |
sym->setAddress(baseSym, baseAddr + idx * eltSize); |
return sym; |
} |
Instruction * |
BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i, |
Value *zero, |
Value *carry) |
{ |
DataType hTy; |
int srcNr; |
switch (i->dType) { |
case TYPE_U64: hTy = TYPE_U32; break; |
case TYPE_S64: hTy = TYPE_S32; break; |
default: |
return NULL; |
} |
switch (i->op) { |
case OP_MOV: srcNr = 1; break; |
case OP_ADD: |
case OP_SUB: |
if (!carry) |
return NULL; |
srcNr = 2; |
break; |
default: |
// TODO when needed |
return NULL; |
} |
i->setType(hTy); |
i->setDef(0, cloneShallow(fn, i->getDef(0))); |
i->getDef(0)->reg.size = 4; |
Instruction *lo = i; |
Instruction *hi = cloneForward(fn, i); |
lo->bb->insertAfter(lo, hi); |
hi->getDef(0)->reg.data.id++; |
for (int s = 0; s < srcNr; ++s) { |
if (lo->getSrc(s)->reg.size < 8) { |
hi->setSrc(s, zero); |
} else { |
if (lo->getSrc(s)->refCount() > 1) |
lo->setSrc(s, cloneShallow(fn, lo->getSrc(s))); |
lo->getSrc(s)->reg.size /= 2; |
hi->setSrc(s, cloneShallow(fn, lo->getSrc(s))); |
switch (hi->src(s).getFile()) { |
case FILE_IMMEDIATE: |
hi->getSrc(s)->reg.data.u64 >>= 32; |
break; |
case FILE_MEMORY_CONST: |
case FILE_MEMORY_SHARED: |
case FILE_SHADER_INPUT: |
hi->getSrc(s)->reg.data.offset += 4; |
break; |
default: |
assert(hi->src(s).getFile() == FILE_GPR); |
hi->getSrc(s)->reg.data.id++; |
break; |
} |
} |
} |
if (srcNr == 2) { |
lo->setDef(1, carry); |
hi->setFlagsSrc(hi->srcCount(), carry); |
} |
return hi; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h |
---|
0,0 → 1,324 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_BUILD_UTIL__ |
#define __NV50_IR_BUILD_UTIL__ |
namespace nv50_ir { |
class BuildUtil |
{ |
public: |
BuildUtil(); |
BuildUtil(Program *); |
inline void setProgram(Program *); |
inline Program *getProgram() const { return prog; } |
inline Function *getFunction() const { return func; } |
// keeps inserting at head/tail of block |
inline void setPosition(BasicBlock *, bool tail); |
// position advances only if @after is true |
inline void setPosition(Instruction *, bool after); |
inline BasicBlock *getBB() { return bb; } |
inline void insert(Instruction *); |
inline void remove(Instruction *i) { assert(i->bb == bb); bb->remove(i); } |
inline LValue *getScratch(int size = 4, DataFile = FILE_GPR); |
// scratch value for a single assignment: |
inline LValue *getSSA(int size = 4, DataFile = FILE_GPR); |
inline Instruction *mkOp(operation, DataType, Value *); |
Instruction *mkOp1(operation, DataType, Value *, Value *); |
Instruction *mkOp2(operation, DataType, Value *, Value *, Value *); |
Instruction *mkOp3(operation, DataType, Value *, Value *, Value *, Value *); |
LValue *mkOp1v(operation, DataType, Value *, Value *); |
LValue *mkOp2v(operation, DataType, Value *, Value *, Value *); |
LValue *mkOp3v(operation, DataType, Value *, Value *, Value *, Value *); |
Instruction *mkLoad(DataType, Value *dst, Symbol *, Value *ptr); |
Instruction *mkStore(operation, DataType, Symbol *, Value *ptr, Value *val); |
LValue *mkLoadv(DataType, Symbol *, Value *ptr); |
Instruction *mkMov(Value *, Value *, DataType = TYPE_U32); |
Instruction *mkMovToReg(int id, Value *); |
Instruction *mkMovFromReg(Value *, int id); |
Instruction *mkInterp(unsigned mode, Value *, int32_t offset, Value *rel); |
Instruction *mkFetch(Value *, DataType, DataFile, int32_t offset, |
Value *attrRel, Value *primRel); |
Instruction *mkCvt(operation, DataType, Value *, DataType, Value *); |
CmpInstruction *mkCmp(operation, CondCode, DataType, |
Value *, |
DataType, Value *, Value *, Value * = NULL); |
TexInstruction *mkTex(operation, TexTarget, |
uint16_t tic, uint16_t tsc, |
const std::vector<Value *> &def, |
const std::vector<Value *> &src); |
Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *); |
FlowInstruction *mkFlow(operation, void *target, CondCode, Value *pred); |
Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc); |
Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *); |
void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); |
ImmediateValue *mkImm(float); |
ImmediateValue *mkImm(uint32_t); |
ImmediateValue *mkImm(uint64_t); |
ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); } |
Value *loadImm(Value *dst, float); |
Value *loadImm(Value *dst, uint32_t); |
Value *loadImm(Value *dst, uint64_t); |
Value *loadImm(Value *dst, int i) { return loadImm(dst, (uint32_t)i); } |
// returns high part of the operation |
static Instruction *split64BitOpPostRA(Function *, Instruction *, |
Value *zero, Value *carry); |
struct Location |
{ |
Location(unsigned array, unsigned arrayIdx, unsigned i, unsigned c) |
: array(array), arrayIdx(arrayIdx), i(i), c(c) { } |
Location(const Location &l) |
: array(l.array), arrayIdx(l.arrayIdx), i(l.i), c(l.c) { } |
bool operator==(const Location &l) const |
{ |
return |
array == l.array && arrayIdx == l.arrayIdx && i == l.i && c == l.c; |
} |
bool operator<(const Location &l) const |
{ |
return array != l.array ? array < l.array : |
arrayIdx != l.arrayIdx ? arrayIdx < l.arrayIdx : |
i != l.i ? i < l.i : |
c != l.c ? c < l.c : |
false; |
} |
unsigned array, arrayIdx, i, c; |
}; |
typedef bimap<Location, Value *> ValueMap; |
class DataArray |
{ |
public: |
DataArray(BuildUtil *bld) : up(bld) { } |
void setup(unsigned array, unsigned arrayIdx, |
uint32_t base, int len, int vecDim, int eltSize, |
DataFile file, int8_t fileIdx); |
inline bool exists(ValueMap&, unsigned int i, unsigned int c); |
Value *load(ValueMap&, int i, int c, Value *ptr); |
void store(ValueMap&, int i, int c, Value *ptr, Value *value); |
Value *acquire(ValueMap&, int i, int c); |
private: |
inline Value *lookup(ValueMap&, unsigned i, unsigned c); |
inline Value *insert(ValueMap&, unsigned i, unsigned c, Value *v); |
Symbol *mkSymbol(int i, int c); |
private: |
BuildUtil *up; |
unsigned array, arrayIdx; |
uint32_t baseAddr; |
uint32_t arrayLen; |
Symbol *baseSym; |
uint8_t vecDim; |
uint8_t eltSize; // in bytes |
DataFile file; |
bool regOnly; |
}; |
Symbol *mkSymbol(DataFile file, int8_t fileIndex, |
DataType ty, uint32_t baseAddress); |
Symbol *mkSysVal(SVSemantic svName, uint32_t svIndex); |
private: |
void init(Program *); |
void addImmediate(ImmediateValue *); |
inline unsigned int u32Hash(uint32_t); |
protected: |
Program *prog; |
Function *func; |
Instruction *pos; |
BasicBlock *bb; |
bool tail; |
#define NV50_IR_BUILD_IMM_HT_SIZE 256 |
ImmediateValue *imms[NV50_IR_BUILD_IMM_HT_SIZE]; |
unsigned int immCount; |
}; |
unsigned int BuildUtil::u32Hash(uint32_t u) |
{ |
return (u % 273) % NV50_IR_BUILD_IMM_HT_SIZE; |
} |
void BuildUtil::setProgram(Program *program) |
{ |
prog = program; |
} |
void |
BuildUtil::setPosition(BasicBlock *block, bool atTail) |
{ |
bb = block; |
prog = bb->getProgram(); |
func = bb->getFunction(); |
pos = NULL; |
tail = atTail; |
} |
void |
BuildUtil::setPosition(Instruction *i, bool after) |
{ |
bb = i->bb; |
prog = bb->getProgram(); |
func = bb->getFunction(); |
pos = i; |
tail = after; |
assert(bb); |
} |
LValue * |
BuildUtil::getScratch(int size, DataFile f) |
{ |
LValue *lval = new_LValue(func, f); |
lval->reg.size = size; |
return lval; |
} |
LValue * |
BuildUtil::getSSA(int size, DataFile f) |
{ |
LValue *lval = new_LValue(func, f); |
lval->ssa = 1; |
lval->reg.size = size; |
return lval; |
} |
void BuildUtil::insert(Instruction *i) |
{ |
if (!pos) { |
tail ? bb->insertTail(i) : bb->insertHead(i); |
} else { |
if (tail) { |
bb->insertAfter(pos, i); |
pos = i; |
} else { |
bb->insertBefore(pos, i); |
} |
} |
} |
Instruction * |
BuildUtil::mkOp(operation op, DataType ty, Value *dst) |
{ |
Instruction *insn = new_Instruction(func, op, ty); |
insn->setDef(0, dst); |
insert(insn); |
if (op == OP_DISCARD || op == OP_EXIT || |
op == OP_JOIN || |
op == OP_QUADON || op == OP_QUADPOP || |
op == OP_EMIT || op == OP_RESTART) |
insn->fixed = 1; |
return insn; |
} |
inline LValue * |
BuildUtil::mkOp1v(operation op, DataType ty, Value *dst, Value *src) |
{ |
mkOp1(op, ty, dst, src); |
return dst->asLValue(); |
} |
inline LValue * |
BuildUtil::mkOp2v(operation op, DataType ty, Value *dst, |
Value *src0, Value *src1) |
{ |
mkOp2(op, ty, dst, src0, src1); |
return dst->asLValue(); |
} |
inline LValue * |
BuildUtil::mkOp3v(operation op, DataType ty, Value *dst, |
Value *src0, Value *src1, Value *src2) |
{ |
mkOp3(op, ty, dst, src0, src1, src2); |
return dst->asLValue(); |
} |
inline LValue * |
BuildUtil::mkLoadv(DataType ty, Symbol *mem, Value *ptr) |
{ |
LValue *dst = getScratch(); |
mkLoad(ty, dst, mem, ptr); |
return dst; |
} |
bool |
BuildUtil::DataArray::exists(ValueMap &m, unsigned int i, unsigned int c) |
{ |
assert(i < arrayLen && c < vecDim); |
return !regOnly || m.r.count(Location(array, arrayIdx, i, c)); |
} |
Value * |
BuildUtil::DataArray::lookup(ValueMap &m, unsigned i, unsigned c) |
{ |
ValueMap::r_iterator it = m.r.find(Location(array, arrayIdx, i, c)); |
return it != m.r.end() ? it->second : NULL; |
} |
Value * |
BuildUtil::DataArray::insert(ValueMap &m, unsigned i, unsigned c, Value *v) |
{ |
m.insert(Location(array, arrayIdx, i, c), v); |
return v; |
} |
} // namespace nv50_ir |
#endif // __NV50_IR_BUILD_UTIL_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h |
---|
0,0 → 1,221 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_DRIVER_H__ |
#define __NV50_IR_DRIVER_H__ |
#include "pipe/p_shader_tokens.h" |
#include "tgsi/tgsi_util.h" |
#include "tgsi/tgsi_parse.h" |
#include "tgsi/tgsi_scan.h" |
/* |
* This struct constitutes linkage information in TGSI terminology. |
* |
* It is created by the code generator and handed to the pipe driver |
* for input/output slot assignment. |
*/ |
struct nv50_ir_varying |
{ |
uint8_t slot[4]; /* native slots for xyzw (addresses in 32-bit words) */ |
unsigned mask : 4; /* vec4 mask */ |
unsigned linear : 1; /* linearly interpolated if true (and not flat) */ |
unsigned flat : 1; |
unsigned sc : 1; /* special colour interpolation mode (SHADE_MODEL) */ |
unsigned centroid : 1; |
unsigned patch : 1; /* patch constant value */ |
unsigned regular : 1; /* driver-specific meaning (e.g. input in sreg) */ |
unsigned input : 1; /* indicates direction of system values */ |
unsigned oread : 1; /* true if output is read from parallel TCP */ |
ubyte id; /* TGSI register index */ |
ubyte sn; /* TGSI semantic name */ |
ubyte si; /* TGSI semantic index */ |
}; |
#define NV50_PROGRAM_IR_TGSI 0 |
#define NV50_PROGRAM_IR_SM4 1 |
#define NV50_PROGRAM_IR_GLSL 2 |
#define NV50_PROGRAM_IR_LLVM 3 |
#ifdef DEBUG |
# define NV50_IR_DEBUG_BASIC (1 << 0) |
# define NV50_IR_DEBUG_VERBOSE (2 << 0) |
# define NV50_IR_DEBUG_REG_ALLOC (1 << 2) |
#else |
# define NV50_IR_DEBUG_BASIC 0 |
# define NV50_IR_DEBUG_VERBOSE 0 |
# define NV50_IR_DEBUG_REG_ALLOC 0 |
#endif |
#define NV50_SEMANTIC_CLIPDISTANCE (TGSI_SEMANTIC_COUNT + 0) |
#define NV50_SEMANTIC_TESSFACTOR (TGSI_SEMANTIC_COUNT + 7) |
#define NV50_SEMANTIC_TESSCOORD (TGSI_SEMANTIC_COUNT + 8) |
#define NV50_SEMANTIC_COUNT (TGSI_SEMANTIC_COUNT + 10) |
#define NV50_TESS_PART_FRACT_ODD 0 |
#define NV50_TESS_PART_FRACT_EVEN 1 |
#define NV50_TESS_PART_POW2 2 |
#define NV50_TESS_PART_INTEGER 3 |
#define NV50_PRIM_PATCHES PIPE_PRIM_MAX |
struct nv50_ir_prog_symbol |
{ |
uint32_t label; |
uint32_t offset; |
}; |
#define NVISA_GF100_CHIPSET_C0 0xc0 |
#define NVISA_GF100_CHIPSET_D0 0xd0 |
#define NVISA_GK104_CHIPSET 0xe0 |
#define NVISA_GK20A_CHIPSET 0xea |
#define NVISA_GM107_CHIPSET 0x110 |
struct nv50_ir_prog_info |
{ |
uint16_t target; /* chipset (0x50, 0x84, 0xc0, ...) */ |
uint8_t type; /* PIPE_SHADER */ |
uint8_t optLevel; /* optimization level (0 to 3) */ |
uint8_t dbgFlags; |
struct { |
int16_t maxGPR; /* may be -1 if none used */ |
int16_t maxOutput; |
uint32_t tlsSpace; /* required local memory per thread */ |
uint32_t *code; |
uint32_t codeSize; |
uint8_t sourceRep; /* NV50_PROGRAM_IR */ |
const void *source; |
void *relocData; |
struct nv50_ir_prog_symbol *syms; |
uint16_t numSyms; |
} bin; |
struct nv50_ir_varying sv[PIPE_MAX_SHADER_INPUTS]; |
struct nv50_ir_varying in[PIPE_MAX_SHADER_INPUTS]; |
struct nv50_ir_varying out[PIPE_MAX_SHADER_OUTPUTS]; |
uint8_t numInputs; |
uint8_t numOutputs; |
uint8_t numPatchConstants; /* also included in numInputs/numOutputs */ |
uint8_t numSysVals; |
struct { |
uint32_t *buf; /* for IMMEDIATE_ARRAY */ |
uint16_t bufSize; /* size of immediate array */ |
uint16_t count; /* count of inline immediates */ |
uint32_t *data; /* inline immediate data */ |
uint8_t *type; /* for each vec4 (128 bit) */ |
} immd; |
union { |
struct { |
uint32_t inputMask[4]; /* mask of attributes read (1 bit per scalar) */ |
} vp; |
struct { |
uint8_t inputPatchSize; |
uint8_t outputPatchSize; |
uint8_t partitioning; /* PIPE_TESS_PART */ |
int8_t winding; /* +1 (clockwise) / -1 (counter-clockwise) */ |
uint8_t domain; /* PIPE_PRIM_{QUADS,TRIANGLES,LINES} */ |
uint8_t outputPrim; /* PIPE_PRIM_{TRIANGLES,LINES,POINTS} */ |
} tp; |
struct { |
uint8_t inputPrim; |
uint8_t outputPrim; |
unsigned instanceCount; |
unsigned maxVertices; |
} gp; |
struct { |
unsigned numColourResults; |
boolean writesDepth; |
boolean earlyFragTests; |
boolean separateFragData; |
boolean usesDiscard; |
} fp; |
struct { |
uint32_t inputOffset; /* base address for user args */ |
uint32_t sharedOffset; /* reserved space in s[] */ |
uint32_t gridInfoBase; /* base address for NTID,NCTAID */ |
} cp; |
} prop; |
uint8_t numBarriers; |
struct { |
uint8_t clipDistance; /* index of first clip distance output */ |
uint8_t clipDistanceMask; /* mask of clip distances defined */ |
uint8_t cullDistanceMask; /* clip distance mode (1 bit per output) */ |
int8_t genUserClip; /* request user clip planes for ClipVertex */ |
uint16_t ucpBase; /* base address for UCPs */ |
uint8_t ucpCBSlot; /* constant buffer index of UCP data */ |
uint8_t pointSize; /* output index for PointSize */ |
uint8_t instanceId; /* system value index of InstanceID */ |
uint8_t vertexId; /* system value index of VertexID */ |
uint8_t edgeFlagIn; |
uint8_t edgeFlagOut; |
int8_t viewportId; /* output index of ViewportIndex */ |
uint8_t fragDepth; /* output index of FragDepth */ |
uint8_t sampleMask; /* output index of SampleMask */ |
boolean sampleInterp; /* perform sample interp on all fp inputs */ |
uint8_t backFaceColor[2]; /* input/output indices of back face colour */ |
uint8_t globalAccess; /* 1 for read, 2 for wr, 3 for rw */ |
boolean fp64; /* program uses fp64 math */ |
boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */ |
uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */ |
uint16_t texBindBase; /* base address for tex handles (nve4) */ |
uint16_t suInfoBase; /* base address for surface info (nve4) */ |
uint16_t sampleInfoBase; /* base address for sample positions */ |
uint8_t msInfoCBSlot; /* cX[] used for multisample info */ |
uint16_t msInfoBase; /* base address for multisample info */ |
} io; |
/* driver callback to assign input/output locations */ |
int (*assignSlots)(struct nv50_ir_prog_info *); |
void *driverPriv; |
}; |
#ifdef __cplusplus |
extern "C" { |
#endif |
extern int nv50_ir_generate_code(struct nv50_ir_prog_info *); |
extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, |
uint32_t codePos, |
uint32_t libPos, |
uint32_t dataPos); |
/* obtain code that will be shared among programs */ |
extern void nv50_ir_get_target_library(uint32_t chipset, |
const uint32_t **code, uint32_t *size); |
#ifdef __cplusplus |
} |
#endif |
#endif // __NV50_IR_DRIVER_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp |
---|
0,0 → 1,1913 |
/* |
* Copyright 2012 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target_nvc0.h" |
// CodeEmitter for GK110 encoding of the Fermi/Kepler ISA. |
namespace nv50_ir { |
class CodeEmitterGK110 : public CodeEmitter |
{ |
public: |
CodeEmitterGK110(const TargetNVC0 *); |
virtual bool emitInstruction(Instruction *); |
virtual uint32_t getMinEncodingSize(const Instruction *) const; |
virtual void prepareEmission(Function *); |
inline void setProgramType(Program::Type pType) { progType = pType; } |
private: |
const TargetNVC0 *targNVC0; |
Program::Type progType; |
const bool writeIssueDelays; |
private: |
void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1); |
void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg); |
void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier); |
void emitPredicate(const Instruction *); |
void setCAddress14(const ValueRef&); |
void setShortImmediate(const Instruction *, const int s); |
void setImmediate32(const Instruction *, const int s, Modifier); |
void modNegAbsF32_3b(const Instruction *, const int s); |
void emitCondCode(CondCode cc, int pos, uint8_t mask); |
void emitInterpMode(const Instruction *); |
void emitLoadStoreType(DataType ty, const int pos); |
void emitCachingMode(CacheMode c, const int pos); |
inline uint8_t getSRegEncoding(const ValueRef&); |
void emitRoundMode(RoundMode, const int pos, const int rintPos); |
void emitRoundModeF(RoundMode, const int pos); |
void emitRoundModeI(RoundMode, const int pos); |
void emitNegAbs12(const Instruction *); |
void emitNOP(const Instruction *); |
void emitLOAD(const Instruction *); |
void emitSTORE(const Instruction *); |
void emitMOV(const Instruction *); |
void emitINTERP(const Instruction *); |
void emitPFETCH(const Instruction *); |
void emitVFETCH(const Instruction *); |
void emitEXPORT(const Instruction *); |
void emitOUT(const Instruction *); |
void emitUADD(const Instruction *); |
void emitFADD(const Instruction *); |
void emitDADD(const Instruction *); |
void emitIMUL(const Instruction *); |
void emitFMUL(const Instruction *); |
void emitDMUL(const Instruction *); |
void emitIMAD(const Instruction *); |
void emitISAD(const Instruction *); |
void emitFMAD(const Instruction *); |
void emitDMAD(const Instruction *); |
void emitNOT(const Instruction *); |
void emitLogicOp(const Instruction *, uint8_t subOp); |
void emitPOPC(const Instruction *); |
void emitINSBF(const Instruction *); |
void emitEXTBF(const Instruction *); |
void emitBFIND(const Instruction *); |
void emitShift(const Instruction *); |
void emitSFnOp(const Instruction *, uint8_t subOp); |
void emitCVT(const Instruction *); |
void emitMINMAX(const Instruction *); |
void emitPreOp(const Instruction *); |
void emitSET(const CmpInstruction *); |
void emitSLCT(const CmpInstruction *); |
void emitSELP(const Instruction *); |
void emitTEXBAR(const Instruction *); |
void emitTEX(const TexInstruction *); |
void emitTEXCSAA(const TexInstruction *); |
void emitTXQ(const TexInstruction *); |
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); |
void emitPIXLD(const Instruction *); |
void emitFlow(const Instruction *); |
inline void defId(const ValueDef&, const int pos); |
inline void srcId(const ValueRef&, const int pos); |
inline void srcId(const ValueRef *, const int pos); |
inline void srcId(const Instruction *, int s, const int pos); |
inline void srcAddr32(const ValueRef&, const int pos); // address / 4 |
inline bool isLIMM(const ValueRef&, DataType ty, bool mod = false); |
}; |
#define GK110_GPR_ZERO 255 |
#define NEG_(b, s) \ |
if (i->src(s).mod.neg()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define ABS_(b, s) \ |
if (i->src(s).mod.abs()) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define NOT_(b, s) if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) \ |
code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32) |
#define RND_(b, t) emitRoundMode##t(i->rnd, 0x##b) |
#define SDATA(a) ((a).rep()->reg.data) |
#define DDATA(a) ((a).rep()->reg.data) |
void CodeEmitterGK110::srcId(const ValueRef& src, const int pos) |
{ |
code[pos / 32] |= (src.get() ? SDATA(src).id : GK110_GPR_ZERO) << (pos % 32); |
} |
void CodeEmitterGK110::srcId(const ValueRef *src, const int pos) |
{ |
code[pos / 32] |= (src ? SDATA(*src).id : GK110_GPR_ZERO) << (pos % 32); |
} |
void CodeEmitterGK110::srcId(const Instruction *insn, int s, int pos) |
{ |
int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : GK110_GPR_ZERO; |
code[pos / 32] |= r << (pos % 32); |
} |
void CodeEmitterGK110::srcAddr32(const ValueRef& src, const int pos) |
{ |
code[pos / 32] |= (SDATA(src).offset >> 2) << (pos % 32); |
} |
void CodeEmitterGK110::defId(const ValueDef& def, const int pos) |
{ |
code[pos / 32] |= (def.get() ? DDATA(def).id : GK110_GPR_ZERO) << (pos % 32); |
} |
bool CodeEmitterGK110::isLIMM(const ValueRef& ref, DataType ty, bool mod) |
{ |
const ImmediateValue *imm = ref.get()->asImm(); |
return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); |
} |
void |
CodeEmitterGK110::emitRoundMode(RoundMode rnd, const int pos, const int rintPos) |
{ |
bool rint = false; |
uint8_t n; |
switch (rnd) { |
case ROUND_MI: rint = true; /* fall through */ case ROUND_M: n = 1; break; |
case ROUND_PI: rint = true; /* fall through */ case ROUND_P: n = 2; break; |
case ROUND_ZI: rint = true; /* fall through */ case ROUND_Z: n = 3; break; |
default: |
rint = rnd == ROUND_NI; |
n = 0; |
assert(rnd == ROUND_N || rnd == ROUND_NI); |
break; |
} |
code[pos / 32] |= n << (pos % 32); |
if (rint && rintPos >= 0) |
code[rintPos / 32] |= 1 << (rintPos % 32); |
} |
void |
CodeEmitterGK110::emitRoundModeF(RoundMode rnd, const int pos) |
{ |
uint8_t n; |
switch (rnd) { |
case ROUND_M: n = 1; break; |
case ROUND_P: n = 2; break; |
case ROUND_Z: n = 3; break; |
default: |
n = 0; |
assert(rnd == ROUND_N); |
break; |
} |
code[pos / 32] |= n << (pos % 32); |
} |
void |
CodeEmitterGK110::emitRoundModeI(RoundMode rnd, const int pos) |
{ |
uint8_t n; |
switch (rnd) { |
case ROUND_MI: n = 1; break; |
case ROUND_PI: n = 2; break; |
case ROUND_ZI: n = 3; break; |
default: |
n = 0; |
assert(rnd == ROUND_NI); |
break; |
} |
code[pos / 32] |= n << (pos % 32); |
} |
void CodeEmitterGK110::emitCondCode(CondCode cc, int pos, uint8_t mask) |
{ |
uint8_t n; |
switch (cc) { |
case CC_FL: n = 0x00; break; |
case CC_LT: n = 0x01; break; |
case CC_EQ: n = 0x02; break; |
case CC_LE: n = 0x03; break; |
case CC_GT: n = 0x04; break; |
case CC_NE: n = 0x05; break; |
case CC_GE: n = 0x06; break; |
case CC_LTU: n = 0x09; break; |
case CC_EQU: n = 0x0a; break; |
case CC_LEU: n = 0x0b; break; |
case CC_GTU: n = 0x0c; break; |
case CC_NEU: n = 0x0d; break; |
case CC_GEU: n = 0x0e; break; |
case CC_TR: n = 0x0f; break; |
case CC_NO: n = 0x10; break; |
case CC_NC: n = 0x11; break; |
case CC_NS: n = 0x12; break; |
case CC_NA: n = 0x13; break; |
case CC_A: n = 0x14; break; |
case CC_S: n = 0x15; break; |
case CC_C: n = 0x16; break; |
case CC_O: n = 0x17; break; |
default: |
n = 0; |
assert(!"invalid condition code"); |
break; |
} |
code[pos / 32] |= (n & mask) << (pos % 32); |
} |
void |
CodeEmitterGK110::emitPredicate(const Instruction *i) |
{ |
if (i->predSrc >= 0) { |
srcId(i->src(i->predSrc), 18); |
if (i->cc == CC_NOT_P) |
code[0] |= 8 << 18; // negate |
assert(i->getPredicate()->reg.file == FILE_PREDICATE); |
} else { |
code[0] |= 7 << 18; |
} |
} |
void |
CodeEmitterGK110::setCAddress14(const ValueRef& src) |
{ |
const Storage& res = src.get()->asSym()->reg; |
const int32_t addr = res.data.offset / 4; |
code[0] |= (addr & 0x01ff) << 23; |
code[1] |= (addr & 0x3e00) >> 9; |
code[1] |= res.fileIndex << 5; |
} |
void |
CodeEmitterGK110::setShortImmediate(const Instruction *i, const int s) |
{ |
const uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; |
const uint64_t u64 = i->getSrc(s)->asImm()->reg.data.u64; |
if (i->sType == TYPE_F32) { |
assert(!(u32 & 0x00000fff)); |
code[0] |= ((u32 & 0x001ff000) >> 12) << 23; |
code[1] |= ((u32 & 0x7fe00000) >> 21); |
code[1] |= ((u32 & 0x80000000) >> 4); |
} else |
if (i->sType == TYPE_F64) { |
assert(!(u64 & 0x00000fffffffffffULL)); |
code[0] |= ((u64 & 0x001ff00000000000ULL) >> 44) << 23; |
code[1] |= ((u64 & 0x7fe0000000000000ULL) >> 53); |
code[1] |= ((u64 & 0x8000000000000000ULL) >> 36); |
} else { |
assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); |
code[0] |= (u32 & 0x001ff) << 23; |
code[1] |= (u32 & 0x7fe00) >> 9; |
code[1] |= (u32 & 0x80000) << 8; |
} |
} |
void |
CodeEmitterGK110::setImmediate32(const Instruction *i, const int s, |
Modifier mod) |
{ |
uint32_t u32 = i->getSrc(s)->asImm()->reg.data.u32; |
if (mod) { |
ImmediateValue imm(i->getSrc(s)->asImm(), i->sType); |
mod.applyTo(imm); |
u32 = imm.reg.data.u32; |
} |
code[0] |= u32 << 23; |
code[1] |= u32 >> 9; |
} |
void |
CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg, |
Modifier mod) |
{ |
code[0] = ctg; |
code[1] = opc << 20; |
emitPredicate(i); |
defId(i->def(0), 2); |
for (int s = 0; s < 3 && i->srcExists(s); ++s) { |
switch (i->src(s).getFile()) { |
case FILE_GPR: |
srcId(i->src(s), s ? 42 : 10); |
break; |
case FILE_IMMEDIATE: |
setImmediate32(i, s, mod); |
break; |
default: |
break; |
} |
} |
} |
void |
CodeEmitterGK110::emitForm_C(const Instruction *i, uint32_t opc, uint8_t ctg) |
{ |
code[0] = ctg; |
code[1] = opc << 20; |
emitPredicate(i); |
defId(i->def(0), 2); |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_CONST: |
code[1] |= 0x4 << 28; |
setCAddress14(i->src(0)); |
break; |
case FILE_GPR: |
code[1] |= 0xc << 28; |
srcId(i->src(0), 23); |
break; |
default: |
assert(0); |
break; |
} |
} |
// 0x2 for GPR, c[] and 0x1 for short immediate |
void |
CodeEmitterGK110::emitForm_21(const Instruction *i, uint32_t opc2, |
uint32_t opc1) |
{ |
const bool imm = i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE; |
int s1 = 23; |
if (i->srcExists(2) && i->src(2).getFile() == FILE_MEMORY_CONST) |
s1 = 42; |
if (imm) { |
code[0] = 0x1; |
code[1] = opc1 << 20; |
} else { |
code[0] = 0x2; |
code[1] = (0xc << 28) | (opc2 << 20); |
} |
emitPredicate(i); |
defId(i->def(0), 2); |
for (int s = 0; s < 3 && i->srcExists(s); ++s) { |
switch (i->src(s).getFile()) { |
case FILE_MEMORY_CONST: |
code[1] &= (s == 2) ? ~(0x4 << 28) : ~(0x8 << 28); |
setCAddress14(i->src(s)); |
break; |
case FILE_IMMEDIATE: |
setShortImmediate(i, s); |
break; |
case FILE_GPR: |
srcId(i->src(s), s ? ((s == 2) ? 42 : s1) : 10); |
break; |
default: |
// ignore here, can be predicate or flags, but must not be address |
break; |
} |
} |
// 0x0 = invalid |
// 0xc = rrr |
// 0x8 = rrc |
// 0x4 = rcr |
assert(imm || (code[1] & (0xc << 28))); |
} |
inline void |
CodeEmitterGK110::modNegAbsF32_3b(const Instruction *i, const int s) |
{ |
if (i->src(s).mod.abs()) code[1] &= ~(1 << 27); |
if (i->src(s).mod.neg()) code[1] ^= (1 << 27); |
} |
void |
CodeEmitterGK110::emitNOP(const Instruction *i) |
{ |
code[0] = 0x00003c02; |
code[1] = 0x85800000; |
if (i) |
emitPredicate(i); |
else |
code[0] = 0x001c3c02; |
} |
void |
CodeEmitterGK110::emitFMAD(const Instruction *i) |
{ |
assert(!isLIMM(i->src(1), TYPE_F32)); |
emitForm_21(i, 0x0c0, 0x940); |
NEG_(34, 2); |
SAT_(35); |
RND_(36, F); |
FTZ_(38); |
DNZ_(39); |
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); |
if (code[0] & 0x1) { |
if (neg1) |
code[1] ^= 1 << 27; |
} else |
if (neg1) { |
code[1] |= 1 << 19; |
} |
} |
void |
CodeEmitterGK110::emitDMAD(const Instruction *i) |
{ |
assert(!i->saturate); |
assert(!i->ftz); |
emitForm_21(i, 0x1b8, 0xb38); |
NEG_(34, 2); |
RND_(36, F); |
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); |
if (code[0] & 0x1) { |
if (neg1) |
code[1] ^= 1 << 27; |
} else |
if (neg1) { |
code[1] |= 1 << 19; |
} |
} |
void |
CodeEmitterGK110::emitFMUL(const Instruction *i) |
{ |
bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
assert(i->postFactor >= -3 && i->postFactor <= 3); |
if (isLIMM(i->src(1), TYPE_F32)) { |
emitForm_L(i, 0x200, 0x2, Modifier(0)); |
FTZ_(38); |
DNZ_(39); |
SAT_(3a); |
if (neg) |
code[1] ^= 1 << 22; |
assert(i->postFactor == 0); |
} else { |
emitForm_21(i, 0x234, 0xc34); |
code[1] |= ((i->postFactor > 0) ? |
(7 - i->postFactor) : (0 - i->postFactor)) << 12; |
RND_(2a, F); |
FTZ_(2f); |
DNZ_(30); |
SAT_(35); |
if (code[0] & 0x1) { |
if (neg) |
code[1] ^= 1 << 27; |
} else |
if (neg) { |
code[1] |= 1 << 19; |
} |
} |
} |
void |
CodeEmitterGK110::emitDMUL(const Instruction *i) |
{ |
bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
assert(!i->postFactor); |
assert(!i->saturate); |
assert(!i->ftz); |
assert(!i->dnz); |
emitForm_21(i, 0x240, 0xc40); |
RND_(2a, F); |
if (code[0] & 0x1) { |
if (neg) |
code[1] ^= 1 << 27; |
} else |
if (neg) { |
code[1] |= 1 << 19; |
} |
} |
void |
CodeEmitterGK110::emitIMUL(const Instruction *i) |
{ |
assert(!i->src(0).mod.neg() && !i->src(1).mod.neg()); |
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); |
if (isLIMM(i->src(1), TYPE_S32)) { |
emitForm_L(i, 0x280, 2, Modifier(0)); |
assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH); |
if (i->sType == TYPE_S32) |
code[1] |= 3 << 25; |
} else { |
emitForm_21(i, 0x21c, 0xc1c); |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) |
code[1] |= 1 << 10; |
if (i->sType == TYPE_S32) |
code[1] |= 3 << 11; |
} |
} |
void |
CodeEmitterGK110::emitFADD(const Instruction *i) |
{ |
if (isLIMM(i->src(1), TYPE_F32)) { |
assert(i->rnd == ROUND_N); |
assert(!i->saturate); |
Modifier mod = i->src(1).mod ^ |
Modifier(i->op == OP_SUB ? NV50_IR_MOD_NEG : 0); |
emitForm_L(i, 0x400, 0, mod); |
FTZ_(3a); |
NEG_(3b, 0); |
ABS_(39, 0); |
} else { |
emitForm_21(i, 0x22c, 0xc2c); |
FTZ_(2f); |
RND_(2a, F); |
ABS_(31, 0); |
NEG_(33, 0); |
SAT_(35); |
if (code[0] & 0x1) { |
modNegAbsF32_3b(i, 1); |
if (i->op == OP_SUB) code[1] ^= 1 << 27; |
} else { |
ABS_(34, 1); |
NEG_(30, 1); |
if (i->op == OP_SUB) code[1] ^= 1 << 16; |
} |
} |
} |
void |
CodeEmitterGK110::emitDADD(const Instruction *i) |
{ |
assert(!i->saturate); |
assert(!i->ftz); |
emitForm_21(i, 0x238, 0xc38); |
RND_(2a, F); |
ABS_(31, 0); |
NEG_(33, 0); |
if (code[0] & 0x1) { |
modNegAbsF32_3b(i, 1); |
if (i->op == OP_SUB) code[1] ^= 1 << 27; |
} else { |
NEG_(30, 1); |
ABS_(34, 1); |
if (i->op == OP_SUB) code[1] ^= 1 << 16; |
} |
} |
void |
CodeEmitterGK110::emitUADD(const Instruction *i) |
{ |
uint8_t addOp = (i->src(0).mod.neg() << 1) | i->src(1).mod.neg(); |
if (i->op == OP_SUB) |
addOp ^= 1; |
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); |
if (isLIMM(i->src(1), TYPE_S32)) { |
emitForm_L(i, 0x400, 1, Modifier((addOp & 1) ? NV50_IR_MOD_NEG : 0)); |
if (addOp & 2) |
code[1] |= 1 << 27; |
assert(!i->defExists(1)); |
assert(i->flagsSrc < 0); |
SAT_(39); |
} else { |
emitForm_21(i, 0x208, 0xc08); |
assert(addOp != 3); // would be add-plus-one |
code[1] |= addOp << 19; |
if (i->defExists(1)) |
code[1] |= 1 << 18; // write carry |
if (i->flagsSrc >= 0) |
code[1] |= 1 << 14; // add carry |
SAT_(35); |
} |
} |
// TODO: shl-add |
void |
CodeEmitterGK110::emitIMAD(const Instruction *i) |
{ |
uint8_t addOp = |
(i->src(2).mod.neg() << 1) | (i->src(0).mod.neg() ^ i->src(1).mod.neg()); |
emitForm_21(i, 0x100, 0xa00); |
assert(addOp != 3); |
code[1] |= addOp << 26; |
if (i->sType == TYPE_S32) |
code[1] |= (1 << 19) | (1 << 24); |
if (code[0] & 0x1) { |
assert(!i->subOp); |
SAT_(39); |
} else { |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) |
code[1] |= 1 << 25; |
SAT_(35); |
} |
} |
void |
CodeEmitterGK110::emitISAD(const Instruction *i) |
{ |
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); |
emitForm_21(i, 0x1f4, 0xb74); |
if (i->dType == TYPE_S32) |
code[1] |= 1 << 19; |
} |
void |
CodeEmitterGK110::emitNOT(const Instruction *i) |
{ |
code[0] = 0x0003fc02; // logop(mov2) dst, 0, not src |
code[1] = 0x22003800; |
emitPredicate(i); |
defId(i->def(0), 2); |
switch (i->src(0).getFile()) { |
case FILE_GPR: |
code[1] |= 0xc << 28; |
srcId(i->src(0), 23); |
break; |
case FILE_MEMORY_CONST: |
code[1] |= 0x4 << 28; |
setCAddress14(i->src(1)); |
break; |
default: |
assert(0); |
break; |
} |
} |
void |
CodeEmitterGK110::emitLogicOp(const Instruction *i, uint8_t subOp) |
{ |
if (isLIMM(i->src(1), TYPE_S32)) { |
emitForm_L(i, 0x200, 0, i->src(1).mod); |
code[1] |= subOp << 24; |
NOT_(3a, 0); |
} else { |
emitForm_21(i, 0x220, 0xc20); |
code[1] |= subOp << 12; |
NOT_(2a, 0); |
NOT_(2b, 1); |
} |
} |
void |
CodeEmitterGK110::emitPOPC(const Instruction *i) |
{ |
assert(!isLIMM(i->src(1), TYPE_S32, true)); |
emitForm_21(i, 0x204, 0xc04); |
NOT_(2a, 0); |
if (!(code[0] & 0x1)) |
NOT_(2b, 1); |
} |
void |
CodeEmitterGK110::emitINSBF(const Instruction *i) |
{ |
emitForm_21(i, 0x1f8, 0xb78); |
} |
void |
CodeEmitterGK110::emitEXTBF(const Instruction *i) |
{ |
emitForm_21(i, 0x600, 0xc00); |
if (i->dType == TYPE_S32) |
code[1] |= 0x80000; |
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) |
code[1] |= 0x800; |
} |
void |
CodeEmitterGK110::emitBFIND(const Instruction *i) |
{ |
emitForm_C(i, 0x218, 0x2); |
if (i->dType == TYPE_S32) |
code[1] |= 0x80000; |
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 0x800; |
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT) |
code[1] |= 0x1000; |
} |
void |
CodeEmitterGK110::emitShift(const Instruction *i) |
{ |
if (i->op == OP_SHR) { |
emitForm_21(i, 0x214, 0xc14); |
if (isSignedType(i->dType)) |
code[1] |= 1 << 19; |
} else { |
emitForm_21(i, 0x224, 0xc24); |
} |
if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) |
code[1] |= 1 << 10; |
} |
void |
CodeEmitterGK110::emitPreOp(const Instruction *i) |
{ |
emitForm_C(i, 0x248, 0x2); |
if (i->op == OP_PREEX2) |
code[1] |= 1 << 10; |
NEG_(30, 0); |
ABS_(34, 0); |
} |
void |
CodeEmitterGK110::emitSFnOp(const Instruction *i, uint8_t subOp) |
{ |
code[0] = 0x00000002 | (subOp << 23); |
code[1] = 0x84000000; |
emitPredicate(i); |
defId(i->def(0), 2); |
srcId(i->src(0), 10); |
NEG_(33, 0); |
ABS_(31, 0); |
SAT_(35); |
} |
void |
CodeEmitterGK110::emitMINMAX(const Instruction *i) |
{ |
uint32_t op2, op1; |
switch (i->dType) { |
case TYPE_U32: |
case TYPE_S32: |
op2 = 0x210; |
op1 = 0xc10; |
break; |
case TYPE_F32: |
op2 = 0x230; |
op1 = 0xc30; |
break; |
case TYPE_F64: |
op2 = 0x228; |
op1 = 0xc28; |
break; |
default: |
assert(0); |
op2 = 0; |
op1 = 0; |
break; |
} |
emitForm_21(i, op2, op1); |
if (i->dType == TYPE_S32) |
code[1] |= 1 << 19; |
code[1] |= (i->op == OP_MIN) ? 0x1c00 : 0x3c00; // [!]pt |
FTZ_(2f); |
ABS_(31, 0); |
NEG_(33, 0); |
if (code[0] & 0x1) { |
modNegAbsF32_3b(i, 1); |
} else { |
ABS_(34, 1); |
NEG_(30, 1); |
} |
} |
void |
CodeEmitterGK110::emitCVT(const Instruction *i) |
{ |
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); |
const bool f2i = !isFloatType(i->dType) && isFloatType(i->sType); |
const bool i2f = isFloatType(i->dType) && !isFloatType(i->sType); |
bool sat = i->saturate; |
bool abs = i->src(0).mod.abs(); |
bool neg = i->src(0).mod.neg(); |
RoundMode rnd = i->rnd; |
switch (i->op) { |
case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; |
case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; |
case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; |
case OP_SAT: sat = true; break; |
case OP_NEG: neg = !neg; break; |
case OP_ABS: abs = true; neg = false; break; |
default: |
break; |
} |
DataType dType; |
if (i->op == OP_NEG && i->dType == TYPE_U32) |
dType = TYPE_S32; |
else |
dType = i->dType; |
uint32_t op; |
if (f2f) op = 0x254; |
else if (f2i) op = 0x258; |
else if (i2f) op = 0x25c; |
else op = 0x260; |
emitForm_C(i, op, 0x2); |
FTZ_(2f); |
if (neg) code[1] |= 1 << 16; |
if (abs) code[1] |= 1 << 20; |
if (sat) code[1] |= 1 << 21; |
emitRoundMode(rnd, 32 + 10, f2f ? (32 + 13) : -1); |
code[0] |= typeSizeofLog2(dType) << 10; |
code[0] |= typeSizeofLog2(i->sType) << 12; |
if (isSignedIntType(dType)) |
code[0] |= 0x4000; |
if (isSignedIntType(i->sType)) |
code[0] |= 0x8000; |
} |
void |
CodeEmitterGK110::emitSET(const CmpInstruction *i) |
{ |
uint16_t op1, op2; |
if (i->def(0).getFile() == FILE_PREDICATE) { |
switch (i->sType) { |
case TYPE_F32: op2 = 0x1d8; op1 = 0xb58; break; |
case TYPE_F64: op2 = 0x1c0; op1 = 0xb40; break; |
default: |
op2 = 0x1b0; |
op1 = 0xb30; |
break; |
} |
emitForm_21(i, op2, op1); |
NEG_(2e, 0); |
ABS_(9, 0); |
if (!(code[0] & 0x1)) { |
NEG_(8, 1); |
ABS_(2f, 1); |
} else { |
modNegAbsF32_3b(i, 1); |
} |
FTZ_(32); |
// normal DST field is negated predicate result |
code[0] = (code[0] & ~0xfc) | ((code[0] << 3) & 0xe0); |
if (i->defExists(1)) |
defId(i->def(1), 2); |
else |
code[0] |= 0x1c; |
} else { |
switch (i->sType) { |
case TYPE_F32: op2 = 0x000; op1 = 0x800; break; |
case TYPE_F64: op2 = 0x080; op1 = 0x900; break; |
default: |
op2 = 0x1a8; |
op1 = 0xb28; |
break; |
} |
emitForm_21(i, op2, op1); |
NEG_(2e, 0); |
ABS_(39, 0); |
if (!(code[0] & 0x1)) { |
NEG_(38, 1); |
ABS_(2f, 1); |
} else { |
modNegAbsF32_3b(i, 1); |
} |
FTZ_(3a); |
if (i->dType == TYPE_F32) |
code[1] |= 1 << 23; |
} |
if (i->sType == TYPE_S32) |
code[1] |= 1 << 19; |
if (i->op != OP_SET) { |
switch (i->op) { |
case OP_SET_AND: code[1] |= 0x0 << 16; break; |
case OP_SET_OR: code[1] |= 0x1 << 16; break; |
case OP_SET_XOR: code[1] |= 0x2 << 16; break; |
default: |
assert(0); |
break; |
} |
srcId(i->src(2), 0x2a); |
} else { |
code[1] |= 0x7 << 10; |
} |
emitCondCode(i->setCond, |
isFloatType(i->sType) ? 0x33 : 0x34, |
isFloatType(i->sType) ? 0xf : 0x7); |
} |
void |
CodeEmitterGK110::emitSLCT(const CmpInstruction *i) |
{ |
CondCode cc = i->setCond; |
if (i->src(2).mod.neg()) |
cc = reverseCondCode(cc); |
if (i->dType == TYPE_F32) { |
emitForm_21(i, 0x1d0, 0xb50); |
FTZ_(32); |
emitCondCode(cc, 0x33, 0xf); |
} else { |
emitForm_21(i, 0x1a0, 0xb20); |
emitCondCode(cc, 0x34, 0x7); |
} |
} |
void CodeEmitterGK110::emitSELP(const Instruction *i) |
{ |
emitForm_21(i, 0x250, 0x050); |
if ((i->cc == CC_NOT_P) ^ (bool)(i->src(2).mod & Modifier(NV50_IR_MOD_NOT))) |
code[1] |= 1 << 13; |
} |
void CodeEmitterGK110::emitTEXBAR(const Instruction *i) |
{ |
code[0] = 0x0000003e | (i->subOp << 23); |
code[1] = 0x77000000; |
emitPredicate(i); |
} |
void CodeEmitterGK110::emitTEXCSAA(const TexInstruction *i) |
{ |
code[0] = 0x00000002; |
code[1] = 0x76c00000; |
code[1] |= i->tex.r << 9; |
// code[1] |= i->tex.s << (9 + 8); |
if (i->tex.liveOnly) |
code[0] |= 0x80000000; |
defId(i->def(0), 2); |
srcId(i->src(0), 10); |
} |
static inline bool |
isNextIndependentTex(const TexInstruction *i) |
{ |
if (!i->next || !isTextureOp(i->next->op)) |
return false; |
if (i->getDef(0)->interfers(i->next->getSrc(0))) |
return false; |
return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); |
} |
void |
CodeEmitterGK110::emitTEX(const TexInstruction *i) |
{ |
const bool ind = i->tex.rIndirectSrc >= 0; |
if (ind) { |
code[0] = 0x00000002; |
switch (i->op) { |
case OP_TXD: |
code[1] = 0x7e000000; |
break; |
case OP_TXLQ: |
code[1] = 0x7e800000; |
break; |
case OP_TXF: |
code[1] = 0x78000000; |
break; |
case OP_TXG: |
code[1] = 0x7dc00000; |
break; |
default: |
code[1] = 0x7d800000; |
break; |
} |
} else { |
switch (i->op) { |
case OP_TXD: |
code[0] = 0x00000002; |
code[1] = 0x76000000; |
code[1] |= i->tex.r << 9; |
break; |
case OP_TXLQ: |
code[0] = 0x00000002; |
code[1] = 0x76800000; |
code[1] |= i->tex.r << 9; |
break; |
case OP_TXF: |
code[0] = 0x00000002; |
code[1] = 0x70000000; |
code[1] |= i->tex.r << 13; |
break; |
case OP_TXG: |
code[0] = 0x00000001; |
code[1] = 0x70000000; |
code[1] |= i->tex.r << 15; |
break; |
default: |
code[0] = 0x00000001; |
code[1] = 0x60000000; |
code[1] |= i->tex.r << 15; |
break; |
} |
} |
code[1] |= isNextIndependentTex(i) ? 0x1 : 0x2; // t : p mode |
if (i->tex.liveOnly) |
code[0] |= 0x80000000; |
switch (i->op) { |
case OP_TEX: break; |
case OP_TXB: code[1] |= 0x2000; break; |
case OP_TXL: code[1] |= 0x3000; break; |
case OP_TXF: break; |
case OP_TXG: break; |
case OP_TXD: break; |
case OP_TXLQ: break; |
default: |
assert(!"invalid texture op"); |
break; |
} |
if (i->op == OP_TXF) { |
if (!i->tex.levelZero) |
code[1] |= 0x1000; |
} else |
if (i->tex.levelZero) { |
code[1] |= 0x1000; |
} |
if (i->op != OP_TXD && i->tex.derivAll) |
code[1] |= 0x200; |
emitPredicate(i); |
code[1] |= i->tex.mask << 2; |
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) |
defId(i->def(0), 2); |
srcId(i->src(0), 10); |
srcId(i, src1, 23); |
if (i->op == OP_TXG) code[1] |= i->tex.gatherComp << 13; |
// texture target: |
code[1] |= (i->tex.target.isCube() ? 3 : (i->tex.target.getDim() - 1)) << 7; |
if (i->tex.target.isArray()) |
code[1] |= 0x40; |
if (i->tex.target.isShadow()) |
code[1] |= 0x400; |
if (i->tex.target == TEX_TARGET_2D_MS || |
i->tex.target == TEX_TARGET_2D_MS_ARRAY) |
code[1] |= 0x800; |
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { |
// ? |
} |
if (i->tex.useOffsets == 1) { |
switch (i->op) { |
case OP_TXF: code[1] |= 0x200; break; |
case OP_TXD: code[1] |= 0x00400000; break; |
default: code[1] |= 0x800; break; |
} |
} |
if (i->tex.useOffsets == 4) |
code[1] |= 0x1000; |
} |
void |
CodeEmitterGK110::emitTXQ(const TexInstruction *i) |
{ |
code[0] = 0x00000002; |
code[1] = 0x75400001; |
switch (i->tex.query) { |
case TXQ_DIMS: code[0] |= 0x01 << 25; break; |
case TXQ_TYPE: code[0] |= 0x02 << 25; break; |
case TXQ_SAMPLE_POSITION: code[0] |= 0x05 << 25; break; |
case TXQ_FILTER: code[0] |= 0x10 << 25; break; |
case TXQ_LOD: code[0] |= 0x12 << 25; break; |
case TXQ_BORDER_COLOUR: code[0] |= 0x16 << 25; break; |
default: |
assert(!"invalid texture query"); |
break; |
} |
code[1] |= i->tex.mask << 2; |
code[1] |= i->tex.r << 9; |
if (/*i->tex.sIndirectSrc >= 0 || */i->tex.rIndirectSrc >= 0) |
code[1] |= 0x08000000; |
defId(i->def(0), 2); |
srcId(i->src(0), 10); |
emitPredicate(i); |
} |
void |
CodeEmitterGK110::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) |
{ |
code[0] = 0x00000002 | ((qOp & 1) << 31); |
code[1] = 0x7fc00000 | (qOp >> 1) | (laneMask << 12); |
defId(i->def(0), 2); |
srcId(i->src(0), 10); |
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 23); |
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) |
code[1] |= 1 << 9; // dall |
emitPredicate(i); |
} |
void |
CodeEmitterGK110::emitPIXLD(const Instruction *i) |
{ |
emitForm_L(i, 0x7f4, 2, Modifier(0)); |
code[1] |= i->subOp << 2; |
code[1] |= 0x00070000; |
} |
void |
CodeEmitterGK110::emitFlow(const Instruction *i) |
{ |
const FlowInstruction *f = i->asFlow(); |
unsigned mask; // bit 0: predicate, bit 1: target |
code[0] = 0x00000000; |
switch (i->op) { |
case OP_BRA: |
code[1] = f->absolute ? 0x10800000 : 0x12000000; |
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) |
code[0] |= 0x80; |
mask = 3; |
break; |
case OP_CALL: |
code[1] = f->absolute ? 0x11000000 : 0x13000000; |
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) |
code[0] |= 0x80; |
mask = 2; |
break; |
case OP_EXIT: code[1] = 0x18000000; mask = 1; break; |
case OP_RET: code[1] = 0x19000000; mask = 1; break; |
case OP_DISCARD: code[1] = 0x19800000; mask = 1; break; |
case OP_BREAK: code[1] = 0x1a000000; mask = 1; break; |
case OP_CONT: code[1] = 0x1a800000; mask = 1; break; |
case OP_JOINAT: code[1] = 0x14800000; mask = 2; break; |
case OP_PREBREAK: code[1] = 0x15000000; mask = 2; break; |
case OP_PRECONT: code[1] = 0x15800000; mask = 2; break; |
case OP_PRERET: code[1] = 0x13800000; mask = 2; break; |
case OP_QUADON: code[1] = 0x1b800000; mask = 0; break; |
case OP_QUADPOP: code[1] = 0x1c000000; mask = 0; break; |
case OP_BRKPT: code[1] = 0x00000000; mask = 0; break; |
default: |
assert(!"invalid flow operation"); |
return; |
} |
if (mask & 1) { |
emitPredicate(i); |
if (i->flagsSrc < 0) |
code[0] |= 0x3c; |
} |
if (!f) |
return; |
if (f->allWarp) |
code[0] |= 1 << 9; |
if (f->limit) |
code[0] |= 1 << 8; |
if (f->op == OP_CALL) { |
if (f->builtin) { |
assert(f->absolute); |
uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin); |
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xff800000, 23); |
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x007fffff, -9); |
} else { |
assert(!f->absolute); |
int32_t pcRel = f->target.fn->binPos - (codeSize + 8); |
code[0] |= (pcRel & 0x1ff) << 23; |
code[1] |= (pcRel >> 9) & 0x7fff; |
} |
} else |
if (mask & 2) { |
int32_t pcRel = f->target.bb->binPos - (codeSize + 8); |
// currently we don't want absolute branches |
assert(!f->absolute); |
code[0] |= (pcRel & 0x1ff) << 23; |
code[1] |= (pcRel >> 9) & 0x7fff; |
} |
} |
void |
CodeEmitterGK110::emitPFETCH(const Instruction *i) |
{ |
uint32_t prim = i->src(0).get()->reg.data.u32; |
code[0] = 0x00000002 | ((prim & 0xff) << 23); |
code[1] = 0x7f800000; |
emitPredicate(i); |
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) |
defId(i->def(0), 2); |
srcId(i, src1, 10); |
} |
void |
CodeEmitterGK110::emitVFETCH(const Instruction *i) |
{ |
unsigned int size = typeSizeof(i->dType); |
uint32_t offset = i->src(0).get()->reg.data.offset; |
code[0] = 0x00000002 | (offset << 23); |
code[1] = 0x7ec00000 | (offset >> 9); |
code[1] |= (size / 4 - 1) << 18; |
if (i->perPatch) |
code[1] |= 0x4; |
if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) |
code[1] |= 0x8; // yes, TCPs can read from *outputs* of other threads |
emitPredicate(i); |
defId(i->def(0), 2); |
srcId(i->src(0).getIndirect(0), 10); |
srcId(i->src(0).getIndirect(1), 32 + 10); // vertex address |
} |
void |
CodeEmitterGK110::emitEXPORT(const Instruction *i) |
{ |
unsigned int size = typeSizeof(i->dType); |
uint32_t offset = i->src(0).get()->reg.data.offset; |
code[0] = 0x00000002 | (offset << 23); |
code[1] = 0x7f000000 | (offset >> 9); |
code[1] |= (size / 4 - 1) << 18; |
if (i->perPatch) |
code[1] |= 0x4; |
emitPredicate(i); |
assert(i->src(1).getFile() == FILE_GPR); |
srcId(i->src(0).getIndirect(0), 10); |
srcId(i->src(0).getIndirect(1), 32 + 10); // vertex base address |
srcId(i->src(1), 2); |
} |
void |
CodeEmitterGK110::emitOUT(const Instruction *i) |
{ |
assert(i->src(0).getFile() == FILE_GPR); |
emitForm_21(i, 0x1f0, 0xb70); |
if (i->op == OP_EMIT) |
code[1] |= 1 << 10; |
if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) |
code[1] |= 1 << 11; |
} |
void |
CodeEmitterGK110::emitInterpMode(const Instruction *i) |
{ |
code[1] |= (i->ipa & 0x3) << 21; // TODO: INTERP_SAMPLEID |
code[1] |= (i->ipa & 0xc) << (19 - 2); |
} |
void |
CodeEmitterGK110::emitINTERP(const Instruction *i) |
{ |
const uint32_t base = i->getSrc(0)->reg.data.offset; |
code[0] = 0x00000002 | (base << 31); |
code[1] = 0x74800000 | (base >> 1); |
if (i->saturate) |
code[1] |= 1 << 18; |
if (i->op == OP_PINTERP) |
srcId(i->src(1), 23); |
else |
code[0] |= 0xff << 23; |
srcId(i->src(0).getIndirect(0), 10); |
emitInterpMode(i); |
emitPredicate(i); |
defId(i->def(0), 2); |
if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) |
srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 10); |
else |
code[1] |= 0xff << 10; |
} |
void |
CodeEmitterGK110::emitLoadStoreType(DataType ty, const int pos) |
{ |
uint8_t n; |
switch (ty) { |
case TYPE_U8: |
n = 0; |
break; |
case TYPE_S8: |
n = 1; |
break; |
case TYPE_U16: |
n = 2; |
break; |
case TYPE_S16: |
n = 3; |
break; |
case TYPE_F32: |
case TYPE_U32: |
case TYPE_S32: |
n = 4; |
break; |
case TYPE_F64: |
case TYPE_U64: |
case TYPE_S64: |
n = 5; |
break; |
case TYPE_B128: |
n = 6; |
break; |
default: |
n = 0; |
assert(!"invalid ld/st type"); |
break; |
} |
code[pos / 32] |= n << (pos % 32); |
} |
void |
CodeEmitterGK110::emitCachingMode(CacheMode c, const int pos) |
{ |
uint8_t n; |
switch (c) { |
case CACHE_CA: |
// case CACHE_WB: |
n = 0; |
break; |
case CACHE_CG: |
n = 1; |
break; |
case CACHE_CS: |
n = 2; |
break; |
case CACHE_CV: |
// case CACHE_WT: |
n = 3; |
break; |
default: |
n = 0; |
assert(!"invalid caching mode"); |
break; |
} |
code[pos / 32] |= n << (pos % 32); |
} |
void |
CodeEmitterGK110::emitSTORE(const Instruction *i) |
{ |
int32_t offset = SDATA(i->src(0)).offset; |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_GLOBAL: code[1] = 0xe0000000; code[0] = 0x00000000; break; |
case FILE_MEMORY_LOCAL: code[1] = 0x7a800000; code[0] = 0x00000002; break; |
case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; |
default: |
assert(!"invalid memory file"); |
break; |
} |
if (i->src(0).getFile() != FILE_MEMORY_GLOBAL) |
offset &= 0xffffff; |
if (code[0] & 0x2) { |
emitLoadStoreType(i->dType, 0x33); |
if (i->src(0).getFile() == FILE_MEMORY_LOCAL) |
emitCachingMode(i->cache, 0x2f); |
} else { |
emitLoadStoreType(i->dType, 0x38); |
emitCachingMode(i->cache, 0x3b); |
} |
code[0] |= offset << 23; |
code[1] |= offset >> 9; |
emitPredicate(i); |
srcId(i->src(1), 2); |
srcId(i->src(0).getIndirect(0), 10); |
} |
void |
CodeEmitterGK110::emitLOAD(const Instruction *i) |
{ |
int32_t offset = SDATA(i->src(0)).offset; |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_GLOBAL: code[1] = 0xc0000000; code[0] = 0x00000000; break; |
case FILE_MEMORY_LOCAL: code[1] = 0x7a000000; code[0] = 0x00000002; break; |
case FILE_MEMORY_SHARED: code[1] = 0x7ac00000; code[0] = 0x00000002; break; |
case FILE_MEMORY_CONST: |
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { |
emitMOV(i); |
return; |
} |
offset &= 0xffff; |
code[0] = 0x00000002; |
code[1] = 0x7c800000 | (i->src(0).get()->reg.fileIndex << 7); |
code[1] |= i->subOp << 15; |
break; |
default: |
assert(!"invalid memory file"); |
break; |
} |
if (code[0] & 0x2) { |
offset &= 0xffffff; |
emitLoadStoreType(i->dType, 0x33); |
if (i->src(0).getFile() == FILE_MEMORY_LOCAL) |
emitCachingMode(i->cache, 0x2f); |
} else { |
emitLoadStoreType(i->dType, 0x38); |
emitCachingMode(i->cache, 0x3b); |
} |
code[0] |= offset << 23; |
code[1] |= offset >> 9; |
emitPredicate(i); |
defId(i->def(0), 2); |
srcId(i->src(0).getIndirect(0), 10); |
} |
uint8_t |
CodeEmitterGK110::getSRegEncoding(const ValueRef& ref) |
{ |
switch (SDATA(ref).sv.sv) { |
case SV_LANEID: return 0x00; |
case SV_PHYSID: return 0x03; |
case SV_VERTEX_COUNT: return 0x10; |
case SV_INVOCATION_ID: return 0x11; |
case SV_YDIR: return 0x12; |
case SV_TID: return 0x21 + SDATA(ref).sv.index; |
case SV_CTAID: return 0x25 + SDATA(ref).sv.index; |
case SV_NTID: return 0x29 + SDATA(ref).sv.index; |
case SV_GRIDID: return 0x2c; |
case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; |
case SV_LBASE: return 0x34; |
case SV_SBASE: return 0x30; |
case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; |
default: |
assert(!"no sreg for system value"); |
return 0; |
} |
} |
void |
CodeEmitterGK110::emitMOV(const Instruction *i) |
{ |
if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { |
code[0] = 0x00000002 | (getSRegEncoding(i->src(0)) << 23); |
code[1] = 0x86400000; |
emitPredicate(i); |
defId(i->def(0), 2); |
} else |
if (i->src(0).getFile() == FILE_IMMEDIATE) { |
code[0] = 0x00000002 | (i->lanes << 14); |
code[1] = 0x74000000; |
emitPredicate(i); |
defId(i->def(0), 2); |
setImmediate32(i, 0, Modifier(0)); |
} else |
if (i->src(0).getFile() == FILE_PREDICATE) { |
code[0] = 0x00000002; |
code[1] = 0x84401c07; |
emitPredicate(i); |
defId(i->def(0), 2); |
srcId(i->src(0), 14); |
} else { |
emitForm_C(i, 0x24c, 2); |
code[1] |= i->lanes << 10; |
} |
} |
bool |
CodeEmitterGK110::emitInstruction(Instruction *insn) |
{ |
const unsigned int size = (writeIssueDelays && !(codeSize & 0x3f)) ? 16 : 8; |
if (insn->encSize != 8) { |
ERROR("skipping unencodable instruction: "); |
insn->print(); |
return false; |
} else |
if (codeSize + size > codeSizeLimit) { |
ERROR("code emitter output buffer too small\n"); |
return false; |
} |
if (writeIssueDelays) { |
int id = (codeSize & 0x3f) / 8 - 1; |
if (id < 0) { |
id += 1; |
code[0] = 0x00000000; // cf issue delay "instruction" |
code[1] = 0x08000000; |
code += 2; |
codeSize += 8; |
} |
uint32_t *data = code - (id * 2 + 2); |
switch (id) { |
case 0: data[0] |= insn->sched << 2; break; |
case 1: data[0] |= insn->sched << 10; break; |
case 2: data[0] |= insn->sched << 18; break; |
case 3: data[0] |= insn->sched << 26; data[1] |= insn->sched >> 6; break; |
case 4: data[1] |= insn->sched << 2; break; |
case 5: data[1] |= insn->sched << 10; break; |
case 6: data[1] |= insn->sched << 18; break; |
default: |
assert(0); |
break; |
} |
} |
// assert that instructions with multiple defs don't corrupt registers |
for (int d = 0; insn->defExists(d); ++d) |
assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); |
switch (insn->op) { |
case OP_MOV: |
case OP_RDSV: |
emitMOV(insn); |
break; |
case OP_NOP: |
break; |
case OP_LOAD: |
emitLOAD(insn); |
break; |
case OP_STORE: |
emitSTORE(insn); |
break; |
case OP_LINTERP: |
case OP_PINTERP: |
emitINTERP(insn); |
break; |
case OP_VFETCH: |
emitVFETCH(insn); |
break; |
case OP_EXPORT: |
emitEXPORT(insn); |
break; |
case OP_PFETCH: |
emitPFETCH(insn); |
break; |
case OP_EMIT: |
case OP_RESTART: |
emitOUT(insn); |
break; |
case OP_ADD: |
case OP_SUB: |
if (insn->dType == TYPE_F64) |
emitDADD(insn); |
else if (isFloatType(insn->dType)) |
emitFADD(insn); |
else |
emitUADD(insn); |
break; |
case OP_MUL: |
if (insn->dType == TYPE_F64) |
emitDMUL(insn); |
else if (isFloatType(insn->dType)) |
emitFMUL(insn); |
else |
emitIMUL(insn); |
break; |
case OP_MAD: |
case OP_FMA: |
if (insn->dType == TYPE_F64) |
emitDMAD(insn); |
else if (isFloatType(insn->dType)) |
emitFMAD(insn); |
else |
emitIMAD(insn); |
break; |
case OP_SAD: |
emitISAD(insn); |
break; |
case OP_NOT: |
emitNOT(insn); |
break; |
case OP_AND: |
emitLogicOp(insn, 0); |
break; |
case OP_OR: |
emitLogicOp(insn, 1); |
break; |
case OP_XOR: |
emitLogicOp(insn, 2); |
break; |
case OP_SHL: |
case OP_SHR: |
emitShift(insn); |
break; |
case OP_SET: |
case OP_SET_AND: |
case OP_SET_OR: |
case OP_SET_XOR: |
emitSET(insn->asCmp()); |
break; |
case OP_SELP: |
emitSELP(insn); |
break; |
case OP_SLCT: |
emitSLCT(insn->asCmp()); |
break; |
case OP_MIN: |
case OP_MAX: |
emitMINMAX(insn); |
break; |
case OP_ABS: |
case OP_NEG: |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_CVT: |
case OP_SAT: |
emitCVT(insn); |
break; |
case OP_RSQ: |
emitSFnOp(insn, 5 + 2 * insn->subOp); |
break; |
case OP_RCP: |
emitSFnOp(insn, 4 + 2 * insn->subOp); |
break; |
case OP_LG2: |
emitSFnOp(insn, 3); |
break; |
case OP_EX2: |
emitSFnOp(insn, 2); |
break; |
case OP_SIN: |
emitSFnOp(insn, 1); |
break; |
case OP_COS: |
emitSFnOp(insn, 0); |
break; |
case OP_PRESIN: |
case OP_PREEX2: |
emitPreOp(insn); |
break; |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXD: |
case OP_TXF: |
case OP_TXG: |
case OP_TXLQ: |
emitTEX(insn->asTex()); |
break; |
case OP_TXQ: |
emitTXQ(insn->asTex()); |
break; |
case OP_TEXBAR: |
emitTEXBAR(insn); |
break; |
case OP_PIXLD: |
emitPIXLD(insn); |
break; |
case OP_BRA: |
case OP_CALL: |
case OP_PRERET: |
case OP_RET: |
case OP_DISCARD: |
case OP_EXIT: |
case OP_PRECONT: |
case OP_CONT: |
case OP_PREBREAK: |
case OP_BREAK: |
case OP_JOINAT: |
case OP_BRKPT: |
case OP_QUADON: |
case OP_QUADPOP: |
emitFlow(insn); |
break; |
case OP_QUADOP: |
emitQUADOP(insn, insn->subOp, insn->lanes); |
break; |
case OP_DFDX: |
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); |
break; |
case OP_DFDY: |
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); |
break; |
case OP_POPCNT: |
emitPOPC(insn); |
break; |
case OP_INSBF: |
emitINSBF(insn); |
break; |
case OP_EXTBF: |
emitEXTBF(insn); |
break; |
case OP_BFIND: |
emitBFIND(insn); |
break; |
case OP_JOIN: |
emitNOP(insn); |
insn->join = 1; |
break; |
case OP_PHI: |
case OP_UNION: |
case OP_CONSTRAINT: |
ERROR("operation should have been eliminated"); |
return false; |
case OP_EXP: |
case OP_LOG: |
case OP_SQRT: |
case OP_POW: |
ERROR("operation should have been lowered\n"); |
return false; |
default: |
ERROR("unknow op\n"); |
return false; |
} |
if (insn->join) |
code[0] |= 1 << 22; |
code += 2; |
codeSize += 8; |
return true; |
} |
uint32_t |
CodeEmitterGK110::getMinEncodingSize(const Instruction *i) const |
{ |
// No more short instruction encodings. |
return 8; |
} |
void |
CodeEmitterGK110::prepareEmission(Function *func) |
{ |
const Target *targ = func->getProgram()->getTarget(); |
CodeEmitter::prepareEmission(func); |
if (targ->hasSWSched) |
calculateSchedDataNVC0(targ, func); |
} |
CodeEmitterGK110::CodeEmitterGK110(const TargetNVC0 *target) |
: CodeEmitter(target), |
targNVC0(target), |
writeIssueDelays(target->hasSWSched) |
{ |
code = NULL; |
codeSize = codeSizeLimit = 0; |
relocInfo = NULL; |
} |
CodeEmitter * |
TargetNVC0::createCodeEmitterGK110(Program::Type type) |
{ |
CodeEmitterGK110 *emit = new CodeEmitterGK110(this); |
emit->setProgramType(type); |
return emit; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |
---|
0,0 → 1,2910 |
/* |
* Copyright 2014 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
* |
* Authors: Ben Skeggs <bskeggs@redhat.com> |
*/ |
#include "codegen/nv50_ir_target_gm107.h" |
namespace nv50_ir { |
class CodeEmitterGM107 : public CodeEmitter |
{ |
public: |
CodeEmitterGM107(const TargetGM107 *); |
virtual bool emitInstruction(Instruction *); |
virtual uint32_t getMinEncodingSize(const Instruction *) const; |
virtual void prepareEmission(Program *); |
virtual void prepareEmission(Function *); |
inline void setProgramType(Program::Type pType) { progType = pType; } |
private: |
const TargetGM107 *targGM107; |
Program::Type progType; |
const Instruction *insn; |
const bool writeIssueDelays; |
uint32_t *data; |
private: |
inline void emitField(uint32_t *, int, int, uint32_t); |
inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); } |
inline void emitInsn(uint32_t, bool); |
inline void emitInsn(uint32_t o) { emitInsn(o, true); } |
inline void emitPred(); |
inline void emitGPR(int, const Value *); |
inline void emitGPR(int pos) { |
emitGPR(pos, (const Value *)NULL); |
} |
inline void emitGPR(int pos, const ValueRef &ref) { |
emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); |
} |
inline void emitGPR(int pos, const ValueRef *ref) { |
emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); |
} |
inline void emitGPR(int pos, const ValueDef &def) { |
emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); |
} |
inline void emitSYS(int, const Value *); |
inline void emitSYS(int pos, const ValueRef &ref) { |
emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); |
} |
inline void emitPRED(int, const Value *); |
inline void emitPRED(int pos) { |
emitPRED(pos, (const Value *)NULL); |
} |
inline void emitPRED(int pos, const ValueRef &ref) { |
emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); |
} |
inline void emitPRED(int pos, const ValueDef &def) { |
emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); |
} |
inline void emitADDR(int, int, int, int, const ValueRef &); |
inline void emitCBUF(int, int, int, int, int, const ValueRef &); |
inline bool longIMMD(const ValueRef &); |
inline void emitIMMD(int, int, const ValueRef &); |
void emitCond3(int, CondCode); |
void emitCond4(int, CondCode); |
void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); } |
inline void emitO(int); |
inline void emitP(int); |
inline void emitSAT(int); |
inline void emitCC(int); |
inline void emitX(int); |
inline void emitABS(int, const ValueRef &); |
inline void emitNEG(int, const ValueRef &); |
inline void emitNEG2(int, const ValueRef &, const ValueRef &); |
inline void emitFMZ(int, int); |
inline void emitRND(int, RoundMode, int); |
inline void emitRND(int pos) { |
emitRND(pos, insn->rnd, -1); |
} |
inline void emitPDIV(int); |
inline void emitINV(int, const ValueRef &); |
void emitEXIT(); |
void emitBRA(); |
void emitCAL(); |
void emitPCNT(); |
void emitCONT(); |
void emitPBK(); |
void emitBRK(); |
void emitPRET(); |
void emitRET(); |
void emitSSY(); |
void emitSYNC(); |
void emitSAM(); |
void emitRAM(); |
void emitMOV(); |
void emitS2R(); |
void emitF2F(); |
void emitF2I(); |
void emitI2F(); |
void emitI2I(); |
void emitSHFL(); |
void emitDADD(); |
void emitDMUL(); |
void emitDFMA(); |
void emitDMNMX(); |
void emitDSET(); |
void emitDSETP(); |
void emitFADD(); |
void emitFMUL(); |
void emitFFMA(); |
void emitMUFU(); |
void emitFMNMX(); |
void emitRRO(); |
void emitFCMP(); |
void emitFSET(); |
void emitFSETP(); |
void emitFSWZADD(); |
void emitLOP(); |
void emitNOT(); |
void emitIADD(); |
void emitIMUL(); |
void emitIMAD(); |
void emitIMNMX(); |
void emitICMP(); |
void emitISET(); |
void emitISETP(); |
void emitSHL(); |
void emitSHR(); |
void emitPOPC(); |
void emitBFI(); |
void emitBFE(); |
void emitFLO(); |
void emitLDSTs(int, DataType); |
void emitLDSTc(int); |
void emitLDC(); |
void emitLDL(); |
void emitLDS(); |
void emitLD(); |
void emitSTL(); |
void emitSTS(); |
void emitST(); |
void emitALD(); |
void emitAST(); |
void emitISBERD(); |
void emitIPA(); |
void emitPIXLD(); |
void emitTEXs(int); |
void emitTEX(); |
void emitTLD(); |
void emitTLD4(); |
void emitTXD(); |
void emitTXQ(); |
void emitTMML(); |
void emitDEPBAR(); |
void emitNOP(); |
void emitKIL(); |
void emitOUT(); |
}; |
/******************************************************************************* |
* general instruction layout/fields |
******************************************************************************/ |
void |
CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v) |
{ |
if (b >= 0) { |
uint32_t m = ((1ULL << s) - 1); |
uint64_t d = (uint64_t)(v & m) << b; |
assert(!(v & ~m) || (v & ~m) == ~m); |
data[1] |= d >> 32; |
data[0] |= d; |
} |
} |
void |
CodeEmitterGM107::emitPred() |
{ |
if (insn->predSrc >= 0) { |
emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); |
emitField(19, 1, insn->cc == CC_NOT_P); |
} else { |
emitField(16, 3, 7); |
} |
} |
void |
CodeEmitterGM107::emitInsn(uint32_t hi, bool pred) |
{ |
code[0] = 0x00000000; |
code[1] = hi; |
if (pred) |
emitPred(); |
} |
void |
CodeEmitterGM107::emitGPR(int pos, const Value *val) |
{ |
emitField(pos, 8, val ? val->reg.data.id : 255); |
} |
void |
CodeEmitterGM107::emitSYS(int pos, const Value *val) |
{ |
int id = val ? val->reg.data.id : -1; |
switch (id) { |
case SV_LANEID : id = 0x00; break; |
case SV_VERTEX_COUNT : id = 0x10; break; |
case SV_INVOCATION_ID : id = 0x11; break; |
case SV_INVOCATION_INFO: id = 0x1d; break; |
default: |
assert(!"invalid system value"); |
id = 0; |
break; |
} |
emitField(pos, 8, id); |
} |
void |
CodeEmitterGM107::emitPRED(int pos, const Value *val) |
{ |
emitField(pos, 3, val ? val->reg.data.id : 7); |
} |
void |
CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr, |
const ValueRef &ref) |
{ |
const Value *v = ref.get(); |
assert(!(v->reg.data.offset & ((1 << shr) - 1))); |
if (gpr >= 0) |
emitGPR(gpr, ref.getIndirect(0)); |
emitField(off, len, v->reg.data.offset >> shr); |
} |
void |
CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr, |
const ValueRef &ref) |
{ |
const Value *v = ref.get(); |
const Symbol *s = v->asSym(); |
assert(!(s->reg.data.offset & ((1 << shr) - 1))); |
emitField(buf, 5, v->reg.fileIndex); |
if (gpr >= 0) |
emitGPR(gpr, ref.getIndirect(0)); |
emitField(off, 16, s->reg.data.offset >> shr); |
} |
bool |
CodeEmitterGM107::longIMMD(const ValueRef &ref) |
{ |
if (ref.getFile() == FILE_IMMEDIATE) { |
const ImmediateValue *imm = ref.get()->asImm(); |
if (isFloatType(insn->sType)) { |
if ((imm->reg.data.u32 & 0x00000fff) != 0x00000000) |
return true; |
} else { |
if ((imm->reg.data.u32 & 0xfff00000) != 0x00000000 && |
(imm->reg.data.u32 & 0xfff00000) != 0xfff00000) |
return true; |
} |
} |
return false; |
} |
void |
CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref) |
{ |
const ImmediateValue *imm = ref.get()->asImm(); |
uint32_t val = imm->reg.data.u32; |
if (len == 19) { |
if (isFloatType(insn->sType)) { |
assert(!(val & 0x00000fff)); |
val >>= 12; |
} |
assert(!(val & 0xfff00000) || (val & 0xfff00000) == 0xfff00000); |
emitField( 56, 1, (val & 0x80000) >> 19); |
emitField(pos, len, (val & 0x7ffff)); |
} else { |
emitField(pos, len, val); |
} |
} |
/******************************************************************************* |
* modifiers |
******************************************************************************/ |
void |
CodeEmitterGM107::emitCond3(int pos, CondCode code) |
{ |
int data = 0; |
switch (code) { |
case CC_FL : data = 0x00; break; |
case CC_LTU: |
case CC_LT : data = 0x01; break; |
case CC_EQU: |
case CC_EQ : data = 0x02; break; |
case CC_LEU: |
case CC_LE : data = 0x03; break; |
case CC_GTU: |
case CC_GT : data = 0x04; break; |
case CC_NEU: |
case CC_NE : data = 0x05; break; |
case CC_GEU: |
case CC_GE : data = 0x06; break; |
case CC_TR : data = 0x07; break; |
default: |
assert(!"invalid cond3"); |
break; |
} |
emitField(pos, 3, data); |
} |
void |
CodeEmitterGM107::emitCond4(int pos, CondCode code) |
{ |
int data = 0; |
switch (code) { |
case CC_FL: data = 0x00; break; |
case CC_LT: data = 0x01; break; |
case CC_EQ: data = 0x02; break; |
case CC_LE: data = 0x03; break; |
case CC_GT: data = 0x04; break; |
case CC_NE: data = 0x05; break; |
case CC_GE: data = 0x06; break; |
// case CC_NUM: data = 0x07; break; |
// case CC_NAN: data = 0x08; break; |
case CC_LTU: data = 0x09; break; |
case CC_EQU: data = 0x0a; break; |
case CC_LEU: data = 0x0b; break; |
case CC_GTU: data = 0x0c; break; |
case CC_NEU: data = 0x0d; break; |
case CC_GEU: data = 0x0e; break; |
case CC_TR: data = 0x0f; break; |
default: |
assert(!"invalid cond4"); |
break; |
} |
emitField(pos, 4, data); |
} |
void |
CodeEmitterGM107::emitO(int pos) |
{ |
emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); |
} |
void |
CodeEmitterGM107::emitP(int pos) |
{ |
emitField(pos, 1, insn->perPatch); |
} |
void |
CodeEmitterGM107::emitSAT(int pos) |
{ |
emitField(pos, 1, insn->saturate); |
} |
void |
CodeEmitterGM107::emitCC(int pos) |
{ |
emitField(pos, 1, insn->defExists(1)); |
} |
void |
CodeEmitterGM107::emitX(int pos) |
{ |
emitField(pos, 1, insn->flagsSrc >= 0); |
} |
void |
CodeEmitterGM107::emitABS(int pos, const ValueRef &ref) |
{ |
emitField(pos, 1, ref.mod.abs()); |
} |
void |
CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref) |
{ |
emitField(pos, 1, ref.mod.neg()); |
} |
void |
CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b) |
{ |
emitField(pos, 1, a.mod.neg() ^ b.mod.neg()); |
} |
void |
CodeEmitterGM107::emitFMZ(int pos, int len) |
{ |
emitField(pos, len, insn->dnz << 1 | insn->ftz); |
} |
void |
CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip) |
{ |
int rm = 0, ri = 0; |
switch (rnd) { |
case ROUND_NI: ri = 1; |
case ROUND_N : rm = 0; break; |
case ROUND_MI: ri = 1; |
case ROUND_M : rm = 1; break; |
case ROUND_PI: ri = 1; |
case ROUND_P : rm = 2; break; |
case ROUND_ZI: ri = 1; |
case ROUND_Z : rm = 3; break; |
default: |
assert(!"invalid round mode"); |
break; |
} |
emitField(rip, 1, ri); |
emitField(rmp, 2, rm); |
} |
void |
CodeEmitterGM107::emitPDIV(int pos) |
{ |
assert(insn->postFactor >= -3 && insn->postFactor <= 3); |
if (insn->postFactor > 0) |
emitField(pos, 3, 7 - insn->postFactor); |
else |
emitField(pos, 3, 0 - insn->postFactor); |
} |
void |
CodeEmitterGM107::emitINV(int pos, const ValueRef &ref) |
{ |
emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); |
} |
/******************************************************************************* |
* control flow |
******************************************************************************/ |
void |
CodeEmitterGM107::emitEXIT() |
{ |
emitInsn (0xe3000000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitBRA() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
int gpr = -1; |
if (insn->indirect) { |
if (insn->absolute) |
emitInsn(0xe2000000); // JMX |
else |
emitInsn(0xe2500000); // BRX |
gpr = 0x08; |
} else { |
if (insn->absolute) |
emitInsn(0xe2100000); // JMP |
else |
emitInsn(0xe2400000); // BRA |
emitField(0x07, 1, insn->allWarp); |
} |
emitField(0x06, 1, insn->limit); |
emitCond5(0x00, CC_TR); |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
if (!insn->absolute) |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
else |
emitField(0x14, 32, insn->target.bb->binPos); |
} else { |
emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitCAL() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
if (insn->absolute) { |
emitInsn(0xe2200000, 0); // JCAL |
} else { |
emitInsn(0xe2600000, 0); // CAL |
} |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
if (!insn->absolute) |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
else { |
if (insn->builtin) { |
int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin); |
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20); |
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12); |
} else { |
emitField(0x14, 32, insn->target.bb->binPos); |
} |
} |
} else { |
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitPCNT() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
emitInsn(0xe2b00000, 0); |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
} else { |
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitCONT() |
{ |
emitInsn (0xe3500000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitPBK() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
emitInsn(0xe2a00000, 0); |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
} else { |
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitBRK() |
{ |
emitInsn (0xe3400000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitPRET() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
emitInsn(0xe2700000, 0); |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
} else { |
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitRET() |
{ |
emitInsn (0xe3200000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitSSY() |
{ |
const FlowInstruction *insn = this->insn->asFlow(); |
emitInsn(0xe2900000, 0); |
if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) { |
emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8)); |
} else { |
emitCBUF (0x24, -1, 20, 16, 0, insn->src(0)); |
emitField(0x05, 1, 1); |
} |
} |
void |
CodeEmitterGM107::emitSYNC() |
{ |
emitInsn (0xf0f80000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitSAM() |
{ |
emitInsn(0xe3700000, 0); |
} |
void |
CodeEmitterGM107::emitRAM() |
{ |
emitInsn(0xe3800000, 0); |
} |
/******************************************************************************* |
* predicate/cc |
******************************************************************************/ |
/******************************************************************************* |
* movement / conversion |
******************************************************************************/ |
void |
CodeEmitterGM107::emitMOV() |
{ |
if ( insn->src(0).getFile() != FILE_IMMEDIATE || |
(insn->sType != TYPE_F32 && !longIMMD(insn->src(0)))) { |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c980000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c980000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38980000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src file"); |
break; |
} |
emitField(0x27, 4, insn->lanes); |
} else { |
emitInsn (0x01000000); |
emitIMMD (0x14, 32, insn->src(0)); |
emitField(0x0c, 4, insn->lanes); |
} |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitS2R() |
{ |
emitInsn(0xf0c80000); |
emitSYS (0x14, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitF2F() |
{ |
RoundMode rnd = insn->rnd; |
switch (insn->op) { |
case OP_FLOOR: rnd = ROUND_MI; break; |
case OP_CEIL : rnd = ROUND_PI; break; |
case OP_TRUNC: rnd = ROUND_ZI; break; |
default: |
break; |
} |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5ca80000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4ca80000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38a80000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src0 file"); |
break; |
} |
emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate); |
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); |
emitCC (0x2f); |
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); |
emitFMZ (0x2c, 1); |
emitRND (0x27, rnd, 0x2a); |
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); |
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitF2I() |
{ |
RoundMode rnd = insn->rnd; |
switch (insn->op) { |
case OP_FLOOR: rnd = ROUND_M; break; |
case OP_CEIL : rnd = ROUND_P; break; |
case OP_TRUNC: rnd = ROUND_Z; break; |
default: |
break; |
} |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5cb00000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4cb00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38b00000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src0 file"); |
break; |
} |
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); |
emitCC (0x2f); |
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); |
emitFMZ (0x2c, 1); |
emitRND (0x27, rnd, 0x2a); |
emitField(0x0c, 1, isSignedType(insn->dType)); |
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); |
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitI2F() |
{ |
RoundMode rnd = insn->rnd; |
switch (insn->op) { |
case OP_FLOOR: rnd = ROUND_M; break; |
case OP_CEIL : rnd = ROUND_P; break; |
case OP_TRUNC: rnd = ROUND_Z; break; |
default: |
break; |
} |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5cb80000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4cb80000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38b80000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src0 file"); |
break; |
} |
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); |
emitCC (0x2f); |
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); |
emitRND (0x27, rnd, -1); |
emitField(0x0d, 1, isSignedType(insn->sType)); |
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); |
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitI2I() |
{ |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5ce00000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4ce00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38e00000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src0 file"); |
break; |
} |
emitSAT (0x32); |
emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs()); |
emitCC (0x2f); |
emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg()); |
emitField(0x0d, 1, isSignedType(insn->sType)); |
emitField(0x0c, 1, isSignedType(insn->dType)); |
emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType))); |
emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType))); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitSHFL() |
{ |
int type = 0; |
emitInsn (0xef100000); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitGPR(0x14, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitIMMD(0x14, 5, insn->src(1)); |
type |= 1; |
break; |
default: |
assert(!"invalid src1 file"); |
break; |
} |
/*XXX: what is this arg? hardcode immediate for now */ |
emitField(0x22, 13, 0x1c03); |
type |= 2; |
emitPRED (0x30); |
emitField(0x1e, 2, insn->subOp); |
emitField(0x1c, 2, type); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/******************************************************************************* |
* double |
******************************************************************************/ |
void |
CodeEmitterGM107::emitDADD() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c700000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c700000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38700000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitABS(0x31, insn->src(1)); |
emitNEG(0x30, insn->src(0)); |
emitCC (0x2f); |
emitABS(0x2e, insn->src(0)); |
emitNEG(0x2d, insn->src(1)); |
if (insn->op == OP_SUB) |
code[1] ^= 0x00002000; |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDMUL() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c800000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c800000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38800000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitNEG2(0x30, insn->src(0), insn->src(1)); |
emitCC (0x2f); |
emitRND (0x27); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDFMA() |
{ |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5b700000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4b700000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36700000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x53700000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitRND (0x32); |
emitNEG (0x31, insn->src(2)); |
emitNEG2(0x30, insn->src(0), insn->src(1)); |
emitCC (0x2f); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDMNMX() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c500000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c500000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38500000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitABS (0x31, insn->src(1)); |
emitNEG (0x30, insn->src(0)); |
emitCC (0x2f); |
emitABS (0x2e, insn->src(0)); |
emitNEG (0x2d, insn->src(1)); |
emitField(0x2a, 1, insn->op == OP_MAX); |
emitPRED (0x27); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDSET() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x59000000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x49000000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x32000000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitABS (0x36, insn->src(0)); |
emitNEG (0x35, insn->src(1)); |
emitField(0x34, 1, insn->dType == TYPE_F32); |
emitCond4(0x30, insn->setCond); |
emitCC (0x2f); |
emitABS (0x2c, insn->src(1)); |
emitNEG (0x2b, insn->src(0)); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDSETP() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5b800000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4b800000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36800000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitCond4(0x30, insn->setCond); |
emitABS (0x2c, insn->src(1)); |
emitNEG (0x2b, insn->src(0)); |
emitGPR (0x08, insn->src(0)); |
emitABS (0x07, insn->src(0)); |
emitNEG (0x06, insn->src(1)); |
emitPRED (0x03, insn->def(0)); |
if (insn->defExists(1)) |
emitPRED(0x00, insn->def(1)); |
else |
emitPRED(0x00); |
} |
/******************************************************************************* |
* float |
******************************************************************************/ |
void |
CodeEmitterGM107::emitFADD() |
{ |
if (!longIMMD(insn->src(1))) { |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c580000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c580000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38580000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitSAT(0x32); |
emitABS(0x31, insn->src(1)); |
emitNEG(0x30, insn->src(0)); |
emitCC (0x2f); |
emitABS(0x2e, insn->src(0)); |
emitNEG(0x2d, insn->src(1)); |
emitFMZ(0x2c, 1); |
} else { |
emitInsn(0x08000000); |
emitABS(0x39, insn->src(1)); |
emitNEG(0x38, insn->src(0)); |
emitFMZ(0x37, 1); |
emitABS(0x36, insn->src(0)); |
emitNEG(0x35, insn->src(1)); |
emitCC (0x34); |
emitIMMD(0x14, 32, insn->src(1)); |
} |
if (insn->op == OP_SUB) |
code[1] ^= 0x00002000; |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFMUL() |
{ |
if (!longIMMD(insn->src(1))) { |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c680000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c680000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38680000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitSAT (0x32); |
emitNEG2(0x30, insn->src(0), insn->src(1)); |
emitCC (0x2f); |
emitFMZ (0x2c, 2); |
emitPDIV(0x29); |
emitRND (0x27); |
} else { |
emitInsn(0x1e000000); |
emitSAT (0x37); |
emitFMZ (0x35, 2); |
emitCC (0x34); |
emitIMMD(0x14, 32, insn->src(1)); |
if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg()) |
code[1] ^= 0x00080000; /* flip immd sign bit */ |
} |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFFMA() |
{ |
/*XXX: ffma32i exists, but not using it as third src overlaps dst */ |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x59800000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x49800000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x32800000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x51800000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitRND (0x33); |
emitSAT (0x32); |
emitNEG (0x31, insn->src(2)); |
emitNEG2(0x30, insn->src(0), insn->src(1)); |
emitCC (0x2f); |
emitFMZ(0x35, 2); |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitMUFU() |
{ |
int mufu = 0; |
switch (insn->op) { |
case OP_COS: mufu = 0; break; |
case OP_SIN: mufu = 1; break; |
case OP_EX2: mufu = 2; break; |
case OP_LG2: mufu = 3; break; |
case OP_RCP: mufu = 4 + 2 * insn->subOp; break; |
case OP_RSQ: mufu = 5 + 2 * insn->subOp; break; |
default: |
assert(!"invalid mufu"); |
break; |
} |
emitInsn (0x50800000); |
emitSAT (0x32); |
emitNEG (0x30, insn->src(0)); |
emitABS (0x2e, insn->src(0)); |
emitField(0x14, 3, mufu); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFMNMX() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c600000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c600000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38600000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x2a, 1, insn->op == OP_MAX); |
emitPRED (0x27); |
emitABS(0x31, insn->src(1)); |
emitNEG(0x30, insn->src(0)); |
emitCC (0x2f); |
emitABS(0x2e, insn->src(0)); |
emitNEG(0x2d, insn->src(1)); |
emitFMZ(0x2c, 1); |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitRRO() |
{ |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c900000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c900000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38900000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src file"); |
break; |
} |
emitABS (0x31, insn->src(0)); |
emitNEG (0x2d, insn->src(0)); |
emitField(0x27, 1, insn->op == OP_PREEX2); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFCMP() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
CondCode cc = insn->setCond; |
if (insn->src(2).mod.neg()) |
cc = reverseCondCode(cc); |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5ba00000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4ba00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36a00000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x53a00000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitCond4(0x30, cc); |
emitFMZ (0x2f, 1); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFSET() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x58000000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x48000000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x30000000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitFMZ (0x37, 1); |
emitABS (0x36, insn->src(0)); |
emitNEG (0x35, insn->src(1)); |
emitField(0x34, 1, insn->dType == TYPE_F32); |
emitCond4(0x30, insn->setCond); |
emitCC (0x2f); |
emitABS (0x2c, insn->src(1)); |
emitNEG (0x2b, insn->src(0)); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFSETP() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5bb00000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4bb00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36b00000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitCond4(0x30, insn->setCond); |
emitFMZ (0x2f, 1); |
emitABS (0x2c, insn->src(1)); |
emitNEG (0x2b, insn->src(0)); |
emitGPR (0x08, insn->src(0)); |
emitABS (0x07, insn->src(0)); |
emitNEG (0x06, insn->src(1)); |
emitPRED (0x03, insn->def(0)); |
if (insn->defExists(1)) |
emitPRED(0x00, insn->def(1)); |
else |
emitPRED(0x00); |
} |
void |
CodeEmitterGM107::emitFSWZADD() |
{ |
emitInsn (0x50f80000); |
emitCC (0x2f); |
emitFMZ (0x2c, 1); |
emitRND (0x27); |
emitField(0x26, 1, insn->lanes); /* abused for .ndv */ |
emitField(0x1c, 8, insn->subOp); |
emitGPR (0x14, insn->src(1)); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/******************************************************************************* |
* integer |
******************************************************************************/ |
void |
CodeEmitterGM107::emitLOP() |
{ |
int lop = 0; |
switch (insn->op) { |
case OP_AND: lop = 0; break; |
case OP_OR : lop = 1; break; |
case OP_XOR: lop = 2; break; |
default: |
assert(!"invalid lop"); |
break; |
} |
if (!longIMMD(insn->src(1))) { |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c400000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c400000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38400000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitPRED (0x30); |
emitField(0x29, 2, lop); |
emitINV (0x28, insn->src(1)); |
emitINV (0x27, insn->src(0)); |
} else { |
emitInsn (0x04000000); |
emitINV (0x38, insn->src(1)); |
emitINV (0x37, insn->src(0)); |
emitField(0x35, 2, lop); |
emitIMMD (0x14, 32, insn->src(1)); |
} |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/* special-case of emitLOP(): lop pass_b dst 0 ~src */ |
void |
CodeEmitterGM107::emitNOT() |
{ |
if (!longIMMD(insn->src(0))) { |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c400700); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c400700); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38400700); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitPRED (0x30); |
} else { |
emitInsn (0x05600000); |
emitIMMD (0x14, 32, insn->src(1)); |
} |
emitGPR(0x08); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitIADD() |
{ |
if (!longIMMD(insn->src(1))) { |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c100000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c100000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38100000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitSAT(0x32); |
emitNEG(0x31, insn->src(0)); |
emitNEG(0x30, insn->src(1)); |
emitCC (0x2f); |
} else { |
emitInsn(0x1c000000); |
emitSAT (0x36); |
emitCC (0x34); |
emitIMMD(0x14, 32, insn->src(1)); |
} |
if (insn->op == OP_SUB) |
code[1] ^= 0x00010000; |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitIMUL() |
{ |
if (!longIMMD(insn->src(1))) { |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c380000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c380000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38380000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitCC (0x2f); |
emitField(0x29, 1, isSignedType(insn->sType)); |
emitField(0x28, 1, isSignedType(insn->dType)); |
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); |
} else { |
emitInsn (0x1f000000); |
emitField(0x37, 1, isSignedType(insn->sType)); |
emitField(0x36, 1, isSignedType(insn->dType)); |
emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); |
emitCC (0x34); |
emitIMMD (0x14, 32, insn->src(1)); |
} |
emitGPR(0x08, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitIMAD() |
{ |
/*XXX: imad32i exists, but not using it as third src overlaps dst */ |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5a000000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4a000000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x34000000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x52000000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH); |
emitField(0x35, 1, isSignedType(insn->sType)); |
emitNEG (0x34, insn->src(2)); |
emitNEG2 (0x33, insn->src(0), insn->src(1)); |
emitSAT (0x32); |
emitX (0x31); |
emitField(0x30, 1, isSignedType(insn->dType)); |
emitCC (0x2f); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitIMNMX() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c200000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c200000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38200000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x30, 1, isSignedType(insn->dType)); |
emitCC (0x2f); |
emitField(0x2a, 1, insn->op == OP_MAX); |
emitPRED (0x27); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitICMP() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
CondCode cc = insn->setCond; |
if (insn->src(2).mod.neg()) |
cc = reverseCondCode(cc); |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5b400000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4b400000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36400000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x53400000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitCond3(0x31, cc); |
emitField(0x30, 1, isSignedType(insn->sType)); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitISET() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5b500000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4b500000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36500000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitCond3(0x31, insn->setCond); |
emitField(0x30, 1, isSignedType(insn->sType)); |
emitCC (0x2f); |
emitX (0x2b); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitISETP() |
{ |
const CmpInstruction *insn = this->insn->asCmp(); |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5b600000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4b600000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36600000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
if (insn->op != OP_SET) { |
switch (insn->op) { |
case OP_SET_AND: emitField(0x2d, 2, 0); break; |
case OP_SET_OR : emitField(0x2d, 2, 1); break; |
case OP_SET_XOR: emitField(0x2d, 2, 2); break; |
default: |
assert(!"invalid set op"); |
break; |
} |
emitPRED(0x27, insn->src(2)); |
} else { |
emitPRED(0x27); |
} |
emitCond3(0x31, insn->setCond); |
emitField(0x30, 1, isSignedType(insn->sType)); |
emitX (0x2b); |
emitGPR (0x08, insn->src(0)); |
emitPRED (0x03, insn->def(0)); |
if (insn->defExists(1)) |
emitPRED(0x00, insn->def(1)); |
else |
emitPRED(0x00); |
} |
void |
CodeEmitterGM107::emitSHL() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c480000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c480000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38480000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitCC (0x2f); |
emitX (0x2b); |
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitSHR() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c280000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c280000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38280000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x30, 1, isSignedType(insn->dType)); |
emitCC (0x2f); |
emitX (0x2c); |
emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitPOPC() |
{ |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c080000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c080000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38080000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitINV(0x28, insn->src(0)); |
emitGPR(0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitBFI() |
{ |
switch(insn->src(2).getFile()) { |
case FILE_GPR: |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5bf00000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4bf00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x36f00000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitGPR (0x27, insn->src(2)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x53f00000); |
emitGPR (0x27, insn->src(1)); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2)); |
break; |
default: |
assert(!"bad src2 file"); |
break; |
} |
emitCC (0x2f); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitBFE() |
{ |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c000000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c000000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38000000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x30, 1, isSignedType(insn->dType)); |
emitCC (0x2f); |
emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitFLO() |
{ |
switch (insn->src(0).getFile()) { |
case FILE_GPR: |
emitInsn(0x5c300000); |
emitGPR (0x14, insn->src(0)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0x4c300000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0x38300000); |
emitIMMD(0x14, 19, insn->src(0)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x30, 1, isSignedType(insn->dType)); |
emitCC (0x2f); |
emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); |
emitINV (0x28, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/******************************************************************************* |
* memory |
******************************************************************************/ |
void |
CodeEmitterGM107::emitLDSTs(int pos, DataType type) |
{ |
int data = 0; |
switch (typeSizeof(type)) { |
case 1: data = isSignedType(type) ? 1 : 0; break; |
case 2: data = isSignedType(type) ? 3 : 2; break; |
case 4: data = 4; break; |
case 8: data = 5; break; |
case 16: data = 6; break; |
default: |
assert(!"bad type"); |
break; |
} |
emitField(pos, 3, data); |
} |
void |
CodeEmitterGM107::emitLDSTc(int pos) |
{ |
int mode = 0; |
switch (insn->cache) { |
case CACHE_CA: mode = 0; break; |
case CACHE_CG: mode = 1; break; |
case CACHE_CS: mode = 2; break; |
case CACHE_CV: mode = 3; break; |
default: |
assert(!"invalid caching mode"); |
break; |
} |
emitField(pos, 2, mode); |
} |
void |
CodeEmitterGM107::emitLDC() |
{ |
emitInsn (0xef900000); |
emitLDSTs(0x30, insn->dType); |
emitField(0x2c, 2, insn->subOp); |
emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitLDL() |
{ |
emitInsn (0xef400000); |
emitLDSTs(0x30, insn->dType); |
emitLDSTc(0x2c); |
emitADDR (0x08, 0x14, 24, 0, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitLDS() |
{ |
emitInsn (0xef480000); |
emitLDSTs(0x30, insn->dType); |
emitADDR (0x08, 0x14, 24, 0, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitLD() |
{ |
emitInsn (0x80000000); |
emitPRED (0x3a); |
emitLDSTc(0x38); |
emitLDSTs(0x35, insn->dType); |
emitADDR (0x08, 0x14, 32, 0, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitSTL() |
{ |
emitInsn (0xef500000); |
emitLDSTs(0x30, insn->dType); |
emitLDSTc(0x2c); |
emitADDR (0x08, 0x14, 24, 0, insn->src(0)); |
emitGPR (0x00, insn->src(1)); |
} |
void |
CodeEmitterGM107::emitSTS() |
{ |
emitInsn (0xef580000); |
emitLDSTs(0x30, insn->dType); |
emitADDR (0x08, 0x14, 24, 0, insn->src(0)); |
emitGPR (0x00, insn->src(1)); |
} |
void |
CodeEmitterGM107::emitST() |
{ |
emitInsn (0xa0000000); |
emitPRED (0x3a); |
emitLDSTc(0x38); |
emitLDSTs(0x35, insn->dType); |
emitADDR (0x08, 0x14, 32, 0, insn->src(0)); |
emitGPR (0x00, insn->src(1)); |
} |
void |
CodeEmitterGM107::emitALD() |
{ |
emitInsn (0xefd80000); |
emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1); |
emitGPR (0x27, insn->src(0).getIndirect(1)); |
emitO (0x20); |
emitP (0x1f); |
emitADDR (0x08, 20, 10, 0, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitAST() |
{ |
emitInsn (0xeff00000); |
emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1); |
emitGPR (0x27, insn->src(0).getIndirect(1)); |
emitP (0x1f); |
emitADDR (0x08, 20, 10, 0, insn->src(0)); |
emitGPR (0x00, insn->src(1)); |
} |
void |
CodeEmitterGM107::emitISBERD() |
{ |
emitInsn(0xefd00000); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitIPA() |
{ |
int ipam = 0, ipas = 0; |
switch (insn->getInterpMode()) { |
case NV50_IR_INTERP_LINEAR : ipam = 0; break; |
case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break; |
case NV50_IR_INTERP_FLAT : ipam = 2; break; |
case NV50_IR_INTERP_SC : ipam = 3; break; |
default: |
assert(!"invalid ipa mode"); |
break; |
} |
switch (insn->getSampleMode()) { |
case NV50_IR_INTERP_DEFAULT : ipas = 0; break; |
case NV50_IR_INTERP_CENTROID: ipas = 1; break; |
case NV50_IR_INTERP_OFFSET : ipas = 2; break; |
default: |
assert(!"invalid ipa sample mode"); |
break; |
} |
emitInsn (0xe0000000); |
emitField(0x36, 2, ipam); |
emitField(0x34, 2, ipas); |
emitSAT (0x33); |
emitField(0x2f, 3, 7); |
emitADDR (0x08, 0x1c, 10, 0, insn->src(0)); |
if ((code[0] & 0x0000ff00) != 0x0000ff00) |
code[1] |= 0x00000040; /* .idx */ |
emitGPR(0x00, insn->def(0)); |
if (insn->op == OP_PINTERP) { |
emitGPR(0x14, insn->src(1)); |
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) |
emitGPR(0x27, insn->src(2)); |
} else { |
if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET) |
emitGPR(0x27, insn->src(1)); |
emitGPR(0x14); |
} |
if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) |
emitGPR(0x27); |
} |
/******************************************************************************* |
* surface |
******************************************************************************/ |
void |
CodeEmitterGM107::emitPIXLD() |
{ |
emitInsn (0xefe80000); |
emitPRED (0x2d); |
emitField(0x1f, 3, insn->subOp); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/******************************************************************************* |
* texture |
******************************************************************************/ |
void |
CodeEmitterGM107::emitTEXs(int pos) |
{ |
int src1 = insn->predSrc == 1 ? 2 : 1; |
if (insn->srcExists(src1)) |
emitGPR(pos, insn->src(src1)); |
else |
emitGPR(pos); |
} |
void |
CodeEmitterGM107::emitTEX() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
int lodm = 0; |
if (!insn->tex.levelZero) { |
switch (insn->op) { |
case OP_TEX: lodm = 0; break; |
case OP_TXB: lodm = 2; break; |
case OP_TXL: lodm = 3; break; |
default: |
assert(!"invalid tex op"); |
break; |
} |
} else { |
lodm = 1; |
} |
if (insn->tex.rIndirectSrc >= 0) { |
emitInsn (0xdeb80000); |
emitField(0x35, 2, lodm); |
emitField(0x24, 1, insn->tex.useOffsets == 1); |
} else { |
emitInsn (0xc0380000); |
emitField(0x37, 2, lodm); |
emitField(0x36, 1, insn->tex.useOffsets == 1); |
emitField(0x24, 13, insn->tex.r); |
} |
emitField(0x32, 1, insn->tex.target.isShadow()); |
emitField(0x31, 1, insn->tex.liveOnly); |
emitField(0x23, 1, insn->tex.derivAll); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : |
insn->tex.target.getDim() - 1); |
emitField(0x1c, 1, insn->tex.target.isArray()); |
emitTEXs (0x14); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitTLD() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
if (insn->tex.rIndirectSrc >= 0) { |
emitInsn (0xdd380000); |
} else { |
emitInsn (0xdc380000); |
emitField(0x24, 13, insn->tex.r); |
} |
emitField(0x37, 1, insn->tex.levelZero == 0); |
emitField(0x32, 1, insn->tex.target.isMS()); |
emitField(0x31, 1, insn->tex.liveOnly); |
emitField(0x23, 1, insn->tex.useOffsets == 1); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : |
insn->tex.target.getDim() - 1); |
emitField(0x1c, 1, insn->tex.target.isArray()); |
emitTEXs (0x14); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitTLD4() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
if (insn->tex.rIndirectSrc >= 0) { |
emitInsn (0xdef80000); |
emitField(0x26, 2, insn->tex.gatherComp); |
emitField(0x25, 2, insn->tex.useOffsets == 4); |
emitField(0x24, 2, insn->tex.useOffsets == 1); |
} else { |
emitInsn (0xc8380000); |
emitField(0x38, 2, insn->tex.gatherComp); |
emitField(0x37, 2, insn->tex.useOffsets == 4); |
emitField(0x36, 2, insn->tex.useOffsets == 1); |
emitField(0x24, 13, insn->tex.r); |
} |
emitField(0x32, 1, insn->tex.target.isShadow()); |
emitField(0x31, 1, insn->tex.liveOnly); |
emitField(0x23, 1, insn->tex.derivAll); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : |
insn->tex.target.getDim() - 1); |
emitField(0x1c, 1, insn->tex.target.isArray()); |
emitTEXs (0x14); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitTXD() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
if (insn->tex.rIndirectSrc >= 0) { |
emitInsn (0xde780000); |
} else { |
emitInsn (0xde380000); |
emitField(0x24, 13, insn->tex.r); |
} |
emitField(0x31, 1, insn->tex.liveOnly); |
emitField(0x23, 1, insn->tex.useOffsets == 1); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : |
insn->tex.target.getDim() - 1); |
emitField(0x1c, 1, insn->tex.target.isArray()); |
emitTEXs (0x14); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitTMML() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
if (insn->tex.rIndirectSrc >= 0) { |
emitInsn (0xdf600000); |
} else { |
emitInsn (0xdf580000); |
emitField(0x24, 13, insn->tex.r); |
} |
emitField(0x31, 1, insn->tex.liveOnly); |
emitField(0x23, 1, insn->tex.derivAll); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x1d, 2, insn->tex.target.isCube() ? 3 : |
insn->tex.target.getDim() - 1); |
emitField(0x1c, 1, insn->tex.target.isArray()); |
emitTEXs (0x14); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitTXQ() |
{ |
const TexInstruction *insn = this->insn->asTex(); |
int type = 0; |
switch (insn->tex.query) { |
case TXQ_DIMS : type = 0x01; break; |
case TXQ_TYPE : type = 0x02; break; |
case TXQ_SAMPLE_POSITION: type = 0x05; break; |
case TXQ_FILTER : type = 0x10; break; |
case TXQ_LOD : type = 0x12; break; |
case TXQ_WRAP : type = 0x14; break; |
case TXQ_BORDER_COLOUR : type = 0x16; break; |
default: |
assert(!"invalid txq query"); |
break; |
} |
emitInsn (0xdf4a0000); |
emitField(0x24, 13, insn->tex.r); |
emitField(0x1f, 4, insn->tex.mask); |
emitField(0x16, 6, type); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
void |
CodeEmitterGM107::emitDEPBAR() |
{ |
emitInsn (0xf0f00000); |
emitField(0x1d, 1, 1); /* le */ |
emitField(0x1a, 3, 5); |
emitField(0x14, 6, insn->subOp); |
emitField(0x00, 6, insn->subOp); |
} |
/******************************************************************************* |
* misc |
******************************************************************************/ |
void |
CodeEmitterGM107::emitNOP() |
{ |
emitInsn(0x50b00000); |
} |
void |
CodeEmitterGM107::emitKIL() |
{ |
emitInsn (0xe3300000); |
emitCond5(0x00, CC_TR); |
} |
void |
CodeEmitterGM107::emitOUT() |
{ |
const int cut = insn->op == OP_RESTART || insn->subOp; |
const int emit = insn->op == OP_EMIT; |
switch (insn->src(1).getFile()) { |
case FILE_GPR: |
emitInsn(0xfbe00000); |
emitGPR (0x14, insn->src(1)); |
break; |
case FILE_IMMEDIATE: |
emitInsn(0xf6e00000); |
emitIMMD(0x14, 19, insn->src(1)); |
break; |
case FILE_MEMORY_CONST: |
emitInsn(0xebe00000); |
emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); |
break; |
default: |
assert(!"bad src1 file"); |
break; |
} |
emitField(0x27, 2, (cut << 1) | emit); |
emitGPR (0x08, insn->src(0)); |
emitGPR (0x00, insn->def(0)); |
} |
/******************************************************************************* |
* assembler front-end |
******************************************************************************/ |
bool |
CodeEmitterGM107::emitInstruction(Instruction *i) |
{ |
const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8; |
bool ret = true; |
insn = i; |
if (insn->encSize != 8) { |
ERROR("skipping undecodable instruction: "); insn->print(); |
return false; |
} else |
if (codeSize + size > codeSizeLimit) { |
ERROR("code emitter output buffer too small\n"); |
return false; |
} |
if (writeIssueDelays) { |
int n = ((codeSize & 0x1f) / 8) - 1; |
if (n < 0) { |
data = code; |
data[0] = 0x00000000; |
data[1] = 0x00000000; |
code += 2; |
codeSize += 8; |
n++; |
} |
emitField(data, n * 21, 21, insn->sched); |
} |
switch (insn->op) { |
case OP_EXIT: |
emitEXIT(); |
break; |
case OP_BRA: |
emitBRA(); |
break; |
case OP_CALL: |
emitCAL(); |
break; |
case OP_PRECONT: |
emitPCNT(); |
break; |
case OP_CONT: |
emitCONT(); |
break; |
case OP_PREBREAK: |
emitPBK(); |
break; |
case OP_BREAK: |
emitBRK(); |
break; |
case OP_PRERET: |
emitPRET(); |
break; |
case OP_RET: |
emitRET(); |
break; |
case OP_JOINAT: |
emitSSY(); |
break; |
case OP_JOIN: |
emitSYNC(); |
break; |
case OP_QUADON: |
emitSAM(); |
break; |
case OP_QUADPOP: |
emitRAM(); |
break; |
case OP_MOV: |
if (insn->def(0).getFile() == FILE_GPR && |
insn->src(0).getFile() != FILE_PREDICATE) |
emitMOV(); |
else |
assert(!"R2P/P2R"); |
break; |
case OP_RDSV: |
emitS2R(); |
break; |
case OP_ABS: |
case OP_NEG: |
case OP_SAT: |
case OP_FLOOR: |
case OP_CEIL: |
case OP_TRUNC: |
case OP_CVT: |
if (isFloatType(insn->dType)) { |
if (isFloatType(insn->sType)) |
emitF2F(); |
else |
emitI2F(); |
} else { |
if (isFloatType(insn->sType)) |
emitF2I(); |
else |
emitI2I(); |
} |
break; |
case OP_SHFL: |
emitSHFL(); |
break; |
case OP_ADD: |
case OP_SUB: |
if (isFloatType(insn->dType)) { |
if (insn->dType == TYPE_F64) |
emitDADD(); |
else |
emitFADD(); |
} else { |
emitIADD(); |
} |
break; |
case OP_MUL: |
if (isFloatType(insn->dType)) { |
if (insn->dType == TYPE_F64) |
emitDMUL(); |
else |
emitFMUL(); |
} else { |
emitIMUL(); |
} |
break; |
case OP_MAD: |
case OP_FMA: |
if (isFloatType(insn->dType)) { |
if (insn->dType == TYPE_F64) |
emitDFMA(); |
else |
emitFFMA(); |
} else { |
emitIMAD(); |
} |
break; |
case OP_MIN: |
case OP_MAX: |
if (isFloatType(insn->dType)) { |
if (insn->dType == TYPE_F64) |
emitDMNMX(); |
else |
emitFMNMX(); |
} else { |
emitIMNMX(); |
} |
break; |
case OP_SHL: |
emitSHL(); |
break; |
case OP_SHR: |
emitSHR(); |
break; |
case OP_POPCNT: |
emitPOPC(); |
break; |
case OP_INSBF: |
emitBFI(); |
break; |
case OP_EXTBF: |
emitBFE(); |
break; |
case OP_BFIND: |
emitFLO(); |
break; |
case OP_SLCT: |
if (isFloatType(insn->dType)) |
emitFCMP(); |
else |
emitICMP(); |
break; |
case OP_SET: |
case OP_SET_AND: |
case OP_SET_OR: |
case OP_SET_XOR: |
if (insn->def(0).getFile() != FILE_PREDICATE) { |
if (isFloatType(insn->sType)) |
if (insn->sType == TYPE_F64) |
emitDSET(); |
else |
emitFSET(); |
else |
emitISET(); |
} else { |
if (isFloatType(insn->sType)) |
if (insn->sType == TYPE_F64) |
emitDSETP(); |
else |
emitFSETP(); |
else |
emitISETP(); |
} |
break; |
case OP_PRESIN: |
case OP_PREEX2: |
emitRRO(); |
break; |
case OP_COS: |
case OP_SIN: |
case OP_EX2: |
case OP_LG2: |
case OP_RCP: |
case OP_RSQ: |
emitMUFU(); |
break; |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
emitLOP(); |
break; |
case OP_NOT: |
emitNOT(); |
break; |
case OP_LOAD: |
switch (insn->src(0).getFile()) { |
case FILE_MEMORY_CONST : emitLDC(); break; |
case FILE_MEMORY_LOCAL : emitLDL(); break; |
case FILE_MEMORY_SHARED: emitLDS(); break; |
case FILE_MEMORY_GLOBAL: emitLD(); break; |
default: |
assert(!"invalid load"); |
emitNOP(); |
break; |
} |
break; |
case OP_STORE: |
switch (insn->src(0).getFile()) { |
case FILE_MEMORY_LOCAL : emitSTL(); break; |
case FILE_MEMORY_SHARED: emitSTS(); break; |
case FILE_MEMORY_GLOBAL: emitST(); break; |
default: |
assert(!"invalid load"); |
emitNOP(); |
break; |
} |
break; |
case OP_VFETCH: |
emitALD(); |
break; |
case OP_EXPORT: |
emitAST(); |
break; |
case OP_PFETCH: |
emitISBERD(); |
break; |
case OP_LINTERP: |
case OP_PINTERP: |
emitIPA(); |
break; |
case OP_PIXLD: |
emitPIXLD(); |
break; |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
emitTEX(); |
break; |
case OP_TXF: |
emitTLD(); |
break; |
case OP_TXG: |
emitTLD4(); |
break; |
case OP_TXD: |
emitTXD(); |
break; |
case OP_TXQ: |
emitTXQ(); |
break; |
case OP_TXLQ: |
emitTMML(); |
break; |
case OP_TEXBAR: |
emitDEPBAR(); |
break; |
case OP_QUADOP: |
emitFSWZADD(); |
break; |
case OP_NOP: |
emitNOP(); |
break; |
case OP_DISCARD: |
emitKIL(); |
break; |
case OP_EMIT: |
case OP_RESTART: |
emitOUT(); |
break; |
default: |
assert(!"invalid opcode"); |
emitNOP(); |
ret = false; |
break; |
} |
if (insn->join) { |
/*XXX*/ |
} |
code += 2; |
codeSize += 8; |
return ret; |
} |
uint32_t |
CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const |
{ |
return 8; |
} |
/******************************************************************************* |
* sched data calculator |
******************************************************************************/ |
class SchedDataCalculatorGM107 : public Pass |
{ |
public: |
SchedDataCalculatorGM107(const Target *targ) : targ(targ) {} |
private: |
const Target *targ; |
bool visit(BasicBlock *bb); |
}; |
bool |
SchedDataCalculatorGM107::visit(BasicBlock *bb) |
{ |
for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) { |
/*XXX*/ |
insn->sched = 0x7e0; |
} |
return true; |
} |
/******************************************************************************* |
* main |
******************************************************************************/ |
void |
CodeEmitterGM107::prepareEmission(Function *func) |
{ |
SchedDataCalculatorGM107 sched(targ); |
CodeEmitter::prepareEmission(func); |
sched.run(func, true, true); |
} |
static inline uint32_t sizeToBundlesGM107(uint32_t size) |
{ |
return (size + 23) / 24; |
} |
void |
CodeEmitterGM107::prepareEmission(Program *prog) |
{ |
for (ArrayList::Iterator fi = prog->allFuncs.iterator(); |
!fi.end(); fi.next()) { |
Function *func = reinterpret_cast<Function *>(fi.get()); |
func->binPos = prog->binSize; |
prepareEmission(func); |
// adjust sizes & positions for schedulding info: |
if (prog->getTarget()->hasSWSched) { |
uint32_t adjPos = func->binPos; |
BasicBlock *bb = NULL; |
for (int i = 0; i < func->bbCount; ++i) { |
bb = func->bbArray[i]; |
int32_t adjSize = bb->binSize; |
if (adjPos % 32) { |
adjSize -= 32 - adjPos % 32; |
if (adjSize < 0) |
adjSize = 0; |
} |
adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8; |
bb->binPos = adjPos; |
bb->binSize = adjSize; |
adjPos += adjSize; |
} |
if (bb) |
func->binSize = adjPos - func->binPos; |
} |
prog->binSize += func->binSize; |
} |
} |
CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target) |
: CodeEmitter(target), |
targGM107(target), |
writeIssueDelays(target->hasSWSched) |
{ |
code = NULL; |
codeSize = codeSizeLimit = 0; |
relocInfo = NULL; |
} |
CodeEmitter * |
TargetGM107::createCodeEmitterGM107(Program::Type type) |
{ |
CodeEmitterGM107 *emit = new CodeEmitterGM107(this); |
emit->setProgramType(type); |
return emit; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp |
---|
0,0 → 1,2058 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target_nv50.h" |
namespace nv50_ir { |
#define NV50_OP_ENC_LONG 0 |
#define NV50_OP_ENC_SHORT 1 |
#define NV50_OP_ENC_IMM 2 |
#define NV50_OP_ENC_LONG_ALT 3 |
class CodeEmitterNV50 : public CodeEmitter |
{ |
public: |
CodeEmitterNV50(const TargetNV50 *); |
virtual bool emitInstruction(Instruction *); |
virtual uint32_t getMinEncodingSize(const Instruction *) const; |
inline void setProgramType(Program::Type pType) { progType = pType; } |
virtual void prepareEmission(Function *); |
private: |
Program::Type progType; |
const TargetNV50 *targNV50; |
private: |
inline void defId(const ValueDef&, const int pos); |
inline void srcId(const ValueRef&, const int pos); |
inline void srcId(const ValueRef *, const int pos); |
inline void srcAddr16(const ValueRef&, bool adj, const int pos); |
inline void srcAddr8(const ValueRef&, const int pos); |
void emitFlagsRd(const Instruction *); |
void emitFlagsWr(const Instruction *); |
void emitCondCode(CondCode cc, DataType ty, int pos); |
inline void setARegBits(unsigned int); |
void setAReg16(const Instruction *, int s); |
void setImmediate(const Instruction *, int s); |
void setDst(const Value *); |
void setDst(const Instruction *, int d); |
void setSrcFileBits(const Instruction *, int enc); |
void setSrc(const Instruction *, unsigned int s, int slot); |
void emitForm_MAD(const Instruction *); |
void emitForm_ADD(const Instruction *); |
void emitForm_MUL(const Instruction *); |
void emitForm_IMM(const Instruction *); |
void emitLoadStoreSizeLG(DataType ty, int pos); |
void emitLoadStoreSizeCS(DataType ty); |
void roundMode_MAD(const Instruction *); |
void roundMode_CVT(RoundMode); |
void emitMNeg12(const Instruction *); |
void emitLOAD(const Instruction *); |
void emitSTORE(const Instruction *); |
void emitMOV(const Instruction *); |
void emitRDSV(const Instruction *); |
void emitNOP(); |
void emitINTERP(const Instruction *); |
void emitPFETCH(const Instruction *); |
void emitOUT(const Instruction *); |
void emitUADD(const Instruction *); |
void emitAADD(const Instruction *); |
void emitFADD(const Instruction *); |
void emitIMUL(const Instruction *); |
void emitFMUL(const Instruction *); |
void emitFMAD(const Instruction *); |
void emitIMAD(const Instruction *); |
void emitISAD(const Instruction *); |
void emitMINMAX(const Instruction *); |
void emitPreOp(const Instruction *); |
void emitSFnOp(const Instruction *, uint8_t subOp); |
void emitShift(const Instruction *); |
void emitARL(const Instruction *, unsigned int shl); |
void emitLogicOp(const Instruction *); |
void emitNOT(const Instruction *); |
void emitCVT(const Instruction *); |
void emitSET(const Instruction *); |
void emitTEX(const TexInstruction *); |
void emitTXQ(const TexInstruction *); |
void emitTEXPREP(const TexInstruction *); |
void emitQUADOP(const Instruction *, uint8_t lane, uint8_t quOp); |
void emitFlow(const Instruction *, uint8_t flowOp); |
void emitPRERETEmu(const FlowInstruction *); |
void emitBAR(const Instruction *); |
void emitATOM(const Instruction *); |
}; |
#define SDATA(a) ((a).rep()->reg.data) |
#define DDATA(a) ((a).rep()->reg.data) |
void CodeEmitterNV50::srcId(const ValueRef& src, const int pos) |
{ |
assert(src.get()); |
code[pos / 32] |= SDATA(src).id << (pos % 32); |
} |
void CodeEmitterNV50::srcId(const ValueRef *src, const int pos) |
{ |
assert(src->get()); |
code[pos / 32] |= SDATA(*src).id << (pos % 32); |
} |
void CodeEmitterNV50::srcAddr16(const ValueRef& src, bool adj, const int pos) |
{ |
assert(src.get()); |
int32_t offset = SDATA(src).offset; |
assert(!adj || src.get()->reg.size <= 4); |
if (adj) |
offset /= src.get()->reg.size; |
assert(offset <= 0x7fff && offset >= (int32_t)-0x8000 && (pos % 32) <= 16); |
if (offset < 0) |
offset &= adj ? (0xffff >> (src.get()->reg.size >> 1)) : 0xffff; |
code[pos / 32] |= offset << (pos % 32); |
} |
void CodeEmitterNV50::srcAddr8(const ValueRef& src, const int pos) |
{ |
assert(src.get()); |
uint32_t offset = SDATA(src).offset; |
assert((offset <= 0x1fc || offset == 0x3fc) && !(offset & 0x3)); |
code[pos / 32] |= (offset >> 2) << (pos % 32); |
} |
void CodeEmitterNV50::defId(const ValueDef& def, const int pos) |
{ |
assert(def.get() && def.getFile() != FILE_SHADER_OUTPUT); |
code[pos / 32] |= DDATA(def).id << (pos % 32); |
} |
void |
CodeEmitterNV50::roundMode_MAD(const Instruction *insn) |
{ |
switch (insn->rnd) { |
case ROUND_M: code[1] |= 1 << 22; break; |
case ROUND_P: code[1] |= 2 << 22; break; |
case ROUND_Z: code[1] |= 3 << 22; break; |
default: |
assert(insn->rnd == ROUND_N); |
break; |
} |
} |
void |
CodeEmitterNV50::emitMNeg12(const Instruction *i) |
{ |
code[1] |= i->src(0).mod.neg() << 26; |
code[1] |= i->src(1).mod.neg() << 27; |
} |
void CodeEmitterNV50::emitCondCode(CondCode cc, DataType ty, int pos) |
{ |
uint8_t enc; |
assert(pos >= 32 || pos <= 27); |
switch (cc) { |
case CC_LT: enc = 0x1; break; |
case CC_LTU: enc = 0x9; break; |
case CC_EQ: enc = 0x2; break; |
case CC_EQU: enc = 0xa; break; |
case CC_LE: enc = 0x3; break; |
case CC_LEU: enc = 0xb; break; |
case CC_GT: enc = 0x4; break; |
case CC_GTU: enc = 0xc; break; |
case CC_NE: enc = 0x5; break; |
case CC_NEU: enc = 0xd; break; |
case CC_GE: enc = 0x6; break; |
case CC_GEU: enc = 0xe; break; |
case CC_TR: enc = 0xf; break; |
case CC_FL: enc = 0x0; break; |
case CC_O: enc = 0x10; break; |
case CC_C: enc = 0x11; break; |
case CC_A: enc = 0x12; break; |
case CC_S: enc = 0x13; break; |
case CC_NS: enc = 0x1c; break; |
case CC_NA: enc = 0x1d; break; |
case CC_NC: enc = 0x1e; break; |
case CC_NO: enc = 0x1f; break; |
default: |
enc = 0; |
assert(!"invalid condition code"); |
break; |
} |
if (ty != TYPE_NONE && !isFloatType(ty)) |
enc &= ~0x8; // unordered only exists for float types |
code[pos / 32] |= enc << (pos % 32); |
} |
void |
CodeEmitterNV50::emitFlagsRd(const Instruction *i) |
{ |
int s = (i->flagsSrc >= 0) ? i->flagsSrc : i->predSrc; |
assert(!(code[1] & 0x00003f80)); |
if (s >= 0) { |
assert(i->getSrc(s)->reg.file == FILE_FLAGS); |
emitCondCode(i->cc, TYPE_NONE, 32 + 7); |
srcId(i->src(s), 32 + 12); |
} else { |
code[1] |= 0x0780; |
} |
} |
void |
CodeEmitterNV50::emitFlagsWr(const Instruction *i) |
{ |
assert(!(code[1] & 0x70)); |
int flagsDef = i->flagsDef; |
// find flags definition and check that it is the last def |
if (flagsDef < 0) { |
for (int d = 0; i->defExists(d); ++d) |
if (i->def(d).getFile() == FILE_FLAGS) |
flagsDef = d; |
if (flagsDef >= 0 && 0) // TODO: enforce use of flagsDef at some point |
WARN("Instruction::flagsDef was not set properly\n"); |
} |
if (flagsDef == 0 && i->defExists(1)) |
WARN("flags def should not be the primary definition\n"); |
if (flagsDef >= 0) |
code[1] |= (DDATA(i->def(flagsDef)).id << 4) | 0x40; |
} |
void |
CodeEmitterNV50::setARegBits(unsigned int u) |
{ |
code[0] |= (u & 3) << 26; |
code[1] |= (u & 4); |
} |
void |
CodeEmitterNV50::setAReg16(const Instruction *i, int s) |
{ |
if (i->srcExists(s)) { |
s = i->src(s).indirect[0]; |
if (s >= 0) |
setARegBits(SDATA(i->src(s)).id + 1); |
} |
} |
void |
CodeEmitterNV50::setImmediate(const Instruction *i, int s) |
{ |
const ImmediateValue *imm = i->src(s).get()->asImm(); |
assert(imm); |
uint32_t u = imm->reg.data.u32; |
if (i->src(s).mod & Modifier(NV50_IR_MOD_NOT)) |
u = ~u; |
code[1] |= 3; |
code[0] |= (u & 0x3f) << 16; |
code[1] |= (u >> 6) << 2; |
} |
void |
CodeEmitterNV50::setDst(const Value *dst) |
{ |
const Storage *reg = &dst->join->reg; |
assert(reg->file != FILE_ADDRESS); |
if (reg->data.id < 0 || reg->file == FILE_FLAGS) { |
code[0] |= (127 << 2) | 1; |
code[1] |= 8; |
} else { |
int id; |
if (reg->file == FILE_SHADER_OUTPUT) { |
code[1] |= 8; |
id = reg->data.offset / 4; |
} else { |
id = reg->data.id; |
} |
code[0] |= id << 2; |
} |
} |
void |
CodeEmitterNV50::setDst(const Instruction *i, int d) |
{ |
if (i->defExists(d)) { |
setDst(i->getDef(d)); |
} else |
if (!d) { |
code[0] |= 0x01fc; // bit bucket |
code[1] |= 0x0008; |
} |
} |
// 3 * 2 bits: |
// 0: r |
// 1: a/s |
// 2: c |
// 3: i |
void |
CodeEmitterNV50::setSrcFileBits(const Instruction *i, int enc) |
{ |
uint8_t mode = 0; |
for (unsigned int s = 0; s < Target::operationSrcNr[i->op]; ++s) { |
switch (i->src(s).getFile()) { |
case FILE_GPR: |
break; |
case FILE_MEMORY_SHARED: |
case FILE_SHADER_INPUT: |
mode |= 1 << (s * 2); |
break; |
case FILE_MEMORY_CONST: |
mode |= 2 << (s * 2); |
break; |
case FILE_IMMEDIATE: |
mode |= 3 << (s * 2); |
break; |
default: |
ERROR("invalid file on source %i: %u\n", s, i->src(s).getFile()); |
assert(0); |
break; |
} |
} |
switch (mode) { |
case 0x00: // rrr |
break; |
case 0x01: // arr/grr |
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
code[0] |= 0x01800000; |
if (enc == NV50_OP_ENC_LONG || enc == NV50_OP_ENC_LONG_ALT) |
code[1] |= 0x00200000; |
} else { |
if (enc == NV50_OP_ENC_SHORT) |
code[0] |= 0x01000000; |
else |
code[1] |= 0x00200000; |
} |
break; |
case 0x03: // irr |
assert(i->op == OP_MOV); |
return; |
case 0x0c: // rir |
break; |
case 0x0d: // gir |
assert(progType == Program::TYPE_GEOMETRY || |
progType == Program::TYPE_COMPUTE); |
code[0] |= 0x01000000; |
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
int reg = i->src(0).getIndirect(0)->rep()->reg.data.id; |
assert(reg < 3); |
code[0] |= (reg + 1) << 26; |
} |
break; |
case 0x08: // rcr |
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; |
code[1] |= (i->getSrc(1)->reg.fileIndex << 22); |
break; |
case 0x09: // acr/gcr |
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) { |
code[0] |= 0x01800000; |
} else { |
code[0] |= (enc == NV50_OP_ENC_LONG_ALT) ? 0x01000000 : 0x00800000; |
code[1] |= 0x00200000; |
} |
code[1] |= (i->getSrc(1)->reg.fileIndex << 22); |
break; |
case 0x20: // rrc |
code[0] |= 0x01000000; |
code[1] |= (i->getSrc(2)->reg.fileIndex << 22); |
break; |
case 0x21: // arc |
code[0] |= 0x01000000; |
code[1] |= 0x00200000 | (i->getSrc(2)->reg.fileIndex << 22); |
assert(progType != Program::TYPE_GEOMETRY); |
break; |
default: |
ERROR("not encodable: %x\n", mode); |
assert(0); |
break; |
} |
if (progType != Program::TYPE_COMPUTE) |
return; |
if ((mode & 3) == 1) { |
const int pos = i->src(1).getFile() == FILE_IMMEDIATE ? 13 : 14; |
switch (i->getSrc(0)->reg.type) { |
case TYPE_U8: |
break; |
case TYPE_U16: |
code[0] |= 1 << pos; |
break; |
case TYPE_S16: |
code[0] |= 2 << pos; |
break; |
default: |
code[0] |= 3 << pos; |
assert(i->getSrc(0)->reg.size == 4); |
break; |
} |
} |
} |
void |
CodeEmitterNV50::setSrc(const Instruction *i, unsigned int s, int slot) |
{ |
if (Target::operationSrcNr[i->op] <= s) |
return; |
const Storage *reg = &i->src(s).rep()->reg; |
unsigned int id = (reg->file == FILE_GPR) ? |
reg->data.id : |
reg->data.offset >> (reg->size >> 1); // no > 4 byte sources here |
switch (slot) { |
case 0: code[0] |= id << 9; break; |
case 1: code[0] |= id << 16; break; |
case 2: code[1] |= id << 14; break; |
default: |
assert(0); |
break; |
} |
} |
// the default form: |
// - long instruction |
// - 1 to 3 sources in slots 0, 1, 2 (rrr, arr, rcr, acr, rrc, arc, gcr, grr) |
// - address & flags |
void |
CodeEmitterNV50::emitForm_MAD(const Instruction *i) |
{ |
assert(i->encSize == 8); |
code[0] |= 1; |
emitFlagsRd(i); |
emitFlagsWr(i); |
setDst(i, 0); |
setSrcFileBits(i, NV50_OP_ENC_LONG); |
setSrc(i, 0, 0); |
setSrc(i, 1, 1); |
setSrc(i, 2, 2); |
if (i->getIndirect(0, 0)) { |
assert(!i->getIndirect(1, 0)); |
setAReg16(i, 0); |
} else { |
setAReg16(i, 1); |
} |
} |
// like default form, but 2nd source in slot 2, and no 3rd source |
void |
CodeEmitterNV50::emitForm_ADD(const Instruction *i) |
{ |
assert(i->encSize == 8); |
code[0] |= 1; |
emitFlagsRd(i); |
emitFlagsWr(i); |
setDst(i, 0); |
setSrcFileBits(i, NV50_OP_ENC_LONG_ALT); |
setSrc(i, 0, 0); |
setSrc(i, 1, 2); |
if (i->getIndirect(0, 0)) { |
assert(!i->getIndirect(1, 0)); |
setAReg16(i, 0); |
} else { |
setAReg16(i, 1); |
} |
} |
// default short form (rr, ar, rc, gr) |
void |
CodeEmitterNV50::emitForm_MUL(const Instruction *i) |
{ |
assert(i->encSize == 4 && !(code[0] & 1)); |
assert(i->defExists(0)); |
assert(!i->getPredicate()); |
setDst(i, 0); |
setSrcFileBits(i, NV50_OP_ENC_SHORT); |
setSrc(i, 0, 0); |
setSrc(i, 1, 1); |
} |
// usual immediate form |
// - 1 to 3 sources where last is immediate (rir, gir) |
// - no address or predicate possible |
void |
CodeEmitterNV50::emitForm_IMM(const Instruction *i) |
{ |
assert(i->encSize == 8); |
code[0] |= 1; |
assert(i->defExists(0) && i->srcExists(0)); |
setDst(i, 0); |
setSrcFileBits(i, NV50_OP_ENC_IMM); |
if (Target::operationSrcNr[i->op] > 1) { |
setSrc(i, 0, 0); |
setImmediate(i, 1); |
setSrc(i, 2, 1); |
} else { |
setImmediate(i, 0); |
} |
} |
void |
CodeEmitterNV50::emitLoadStoreSizeLG(DataType ty, int pos) |
{ |
uint8_t enc; |
switch (ty) { |
case TYPE_F32: // fall through |
case TYPE_S32: // fall through |
case TYPE_U32: enc = 0x6; break; |
case TYPE_B128: enc = 0x5; break; |
case TYPE_F64: // fall through |
case TYPE_S64: // fall through |
case TYPE_U64: enc = 0x4; break; |
case TYPE_S16: enc = 0x3; break; |
case TYPE_U16: enc = 0x2; break; |
case TYPE_S8: enc = 0x1; break; |
case TYPE_U8: enc = 0x0; break; |
default: |
enc = 0; |
assert(!"invalid load/store type"); |
break; |
} |
code[pos / 32] |= enc << (pos % 32); |
} |
void |
CodeEmitterNV50::emitLoadStoreSizeCS(DataType ty) |
{ |
switch (ty) { |
case TYPE_U8: break; |
case TYPE_U16: code[1] |= 0x4000; break; |
case TYPE_S16: code[1] |= 0x8000; break; |
case TYPE_F32: |
case TYPE_S32: |
case TYPE_U32: code[1] |= 0xc000; break; |
default: |
assert(0); |
break; |
} |
} |
void |
CodeEmitterNV50::emitLOAD(const Instruction *i) |
{ |
DataFile sf = i->src(0).getFile(); |
int32_t offset = i->getSrc(0)->reg.data.offset; |
switch (sf) { |
case FILE_SHADER_INPUT: |
if (progType == Program::TYPE_GEOMETRY && i->src(0).isIndirect(0)) |
code[0] = 0x11800001; |
else |
// use 'mov' where we can |
code[0] = i->src(0).isIndirect(0) ? 0x00000001 : 0x10000001; |
code[1] = 0x00200000 | (i->lanes << 14); |
if (typeSizeof(i->dType) == 4) |
code[1] |= 0x04000000; |
break; |
case FILE_MEMORY_SHARED: |
if (targ->getChipset() >= 0x84) { |
assert(offset <= (int32_t)(0x3fff * typeSizeof(i->sType))); |
code[0] = 0x10000001; |
code[1] = 0x40000000; |
if (typeSizeof(i->dType) == 4) |
code[1] |= 0x04000000; |
emitLoadStoreSizeCS(i->sType); |
} else { |
assert(offset <= (int32_t)(0x1f * typeSizeof(i->sType))); |
code[0] = 0x10000001; |
code[1] = 0x00200000 | (i->lanes << 14); |
emitLoadStoreSizeCS(i->sType); |
} |
break; |
case FILE_MEMORY_CONST: |
code[0] = 0x10000001; |
code[1] = 0x20000000 | (i->getSrc(0)->reg.fileIndex << 22); |
if (typeSizeof(i->dType) == 4) |
code[1] |= 0x04000000; |
emitLoadStoreSizeCS(i->sType); |
break; |
case FILE_MEMORY_LOCAL: |
code[0] = 0xd0000001; |
code[1] = 0x40000000; |
break; |
case FILE_MEMORY_GLOBAL: |
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); |
code[1] = 0x80000000; |
break; |
default: |
assert(!"invalid load source file"); |
break; |
} |
if (sf == FILE_MEMORY_LOCAL || |
sf == FILE_MEMORY_GLOBAL) |
emitLoadStoreSizeLG(i->sType, 21 + 32); |
setDst(i, 0); |
emitFlagsRd(i); |
emitFlagsWr(i); |
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { |
srcId(*i->src(0).getIndirect(0), 9); |
} else { |
setAReg16(i, 0); |
srcAddr16(i->src(0), i->src(0).getFile() != FILE_MEMORY_LOCAL, 9); |
} |
} |
void |
CodeEmitterNV50::emitSTORE(const Instruction *i) |
{ |
DataFile f = i->getSrc(0)->reg.file; |
int32_t offset = i->getSrc(0)->reg.data.offset; |
switch (f) { |
case FILE_SHADER_OUTPUT: |
code[0] = 0x00000001 | ((offset >> 2) << 9); |
code[1] = 0x80c00000; |
srcId(i->src(1), 32 + 14); |
break; |
case FILE_MEMORY_GLOBAL: |
code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16); |
code[1] = 0xa0000000; |
emitLoadStoreSizeLG(i->dType, 21 + 32); |
srcId(i->src(1), 2); |
break; |
case FILE_MEMORY_LOCAL: |
code[0] = 0xd0000001; |
code[1] = 0x60000000; |
emitLoadStoreSizeLG(i->dType, 21 + 32); |
srcId(i->src(1), 2); |
break; |
case FILE_MEMORY_SHARED: |
code[0] = 0x00000001; |
code[1] = 0xe0000000; |
switch (typeSizeof(i->dType)) { |
case 1: |
code[0] |= offset << 9; |
code[1] |= 0x00400000; |
break; |
case 2: |
code[0] |= (offset >> 1) << 9; |
break; |
case 4: |
code[0] |= (offset >> 2) << 9; |
code[1] |= 0x04200000; |
break; |
default: |
assert(0); |
break; |
} |
srcId(i->src(1), 32 + 14); |
break; |
default: |
assert(!"invalid store destination file"); |
break; |
} |
if (f == FILE_MEMORY_GLOBAL) |
srcId(*i->src(0).getIndirect(0), 9); |
else |
setAReg16(i, 0); |
if (f == FILE_MEMORY_LOCAL) |
srcAddr16(i->src(0), false, 9); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitMOV(const Instruction *i) |
{ |
DataFile sf = i->getSrc(0)->reg.file; |
DataFile df = i->getDef(0)->reg.file; |
assert(sf == FILE_GPR || df == FILE_GPR); |
if (sf == FILE_FLAGS) { |
code[0] = 0x00000001; |
code[1] = 0x20000000; |
defId(i->def(0), 2); |
srcId(i->src(0), 12); |
emitFlagsRd(i); |
} else |
if (sf == FILE_ADDRESS) { |
code[0] = 0x00000001; |
code[1] = 0x40000000; |
defId(i->def(0), 2); |
setARegBits(SDATA(i->src(0)).id + 1); |
emitFlagsRd(i); |
} else |
if (df == FILE_FLAGS) { |
code[0] = 0x00000001; |
code[1] = 0xa0000000; |
defId(i->def(0), 4); |
srcId(i->src(0), 9); |
emitFlagsRd(i); |
} else |
if (sf == FILE_IMMEDIATE) { |
code[0] = 0x10008001; |
code[1] = 0x00000003; |
emitForm_IMM(i); |
} else { |
if (i->encSize == 4) { |
code[0] = 0x10008000; |
} else { |
code[0] = 0x10000001; |
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; |
code[1] |= (i->lanes << 14); |
emitFlagsRd(i); |
} |
defId(i->def(0), 2); |
srcId(i->src(0), 9); |
} |
if (df == FILE_SHADER_OUTPUT) { |
assert(i->encSize == 8); |
code[1] |= 0x8; |
} |
} |
static inline uint8_t getSRegEncoding(const ValueRef &ref) |
{ |
switch (SDATA(ref).sv.sv) { |
case SV_PHYSID: return 0; |
case SV_CLOCK: return 1; |
case SV_VERTEX_STRIDE: return 3; |
// case SV_PM_COUNTER: return 4 + SDATA(ref).sv.index; |
case SV_SAMPLE_INDEX: return 8; |
default: |
assert(!"no sreg for system value"); |
return 0; |
} |
} |
void |
CodeEmitterNV50::emitRDSV(const Instruction *i) |
{ |
code[0] = 0x00000001; |
code[1] = 0x60000000 | (getSRegEncoding(i->src(0)) << 14); |
defId(i->def(0), 2); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitNOP() |
{ |
code[0] = 0xf0000001; |
code[1] = 0xe0000000; |
} |
void |
CodeEmitterNV50::emitQUADOP(const Instruction *i, uint8_t lane, uint8_t quOp) |
{ |
code[0] = 0xc0000000 | (lane << 16); |
code[1] = 0x80000000; |
code[0] |= (quOp & 0x03) << 20; |
code[1] |= (quOp & 0xfc) << 20; |
emitForm_ADD(i); |
if (!i->srcExists(1)) |
srcId(i->src(0), 32 + 14); |
} |
/* NOTE: This returns the base address of a vertex inside the primitive. |
* src0 is an immediate, the index (not offset) of the vertex |
* inside the primitive. XXX: signed or unsigned ? |
* src1 (may be NULL) should use whatever units the hardware requires |
* (on nv50 this is bytes, so, relative index * 4; signed 16 bit value). |
*/ |
void |
CodeEmitterNV50::emitPFETCH(const Instruction *i) |
{ |
const uint32_t prim = i->src(0).get()->reg.data.u32; |
assert(prim <= 127); |
if (i->def(0).getFile() == FILE_ADDRESS) { |
// shl $aX a[] 0 |
code[0] = 0x00000001 | ((DDATA(i->def(0)).id + 1) << 2); |
code[1] = 0xc0200000; |
code[0] |= prim << 9; |
assert(!i->srcExists(1)); |
} else |
if (i->srcExists(1)) { |
// ld b32 $rX a[$aX+base] |
code[0] = 0x00000001; |
code[1] = 0x04200000 | (0xf << 14); |
defId(i->def(0), 2); |
code[0] |= prim << 9; |
setARegBits(SDATA(i->src(1)).id + 1); |
} else { |
// mov b32 $rX a[] |
code[0] = 0x10000001; |
code[1] = 0x04200000 | (0xf << 14); |
defId(i->def(0), 2); |
code[0] |= prim << 9; |
} |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitINTERP(const Instruction *i) |
{ |
code[0] = 0x80000000; |
defId(i->def(0), 2); |
srcAddr8(i->src(0), 16); |
if (i->getInterpMode() == NV50_IR_INTERP_FLAT) { |
code[0] |= 1 << 8; |
} else { |
if (i->op == OP_PINTERP) { |
code[0] |= 1 << 25; |
srcId(i->src(1), 9); |
} |
if (i->getSampleMode() == NV50_IR_INTERP_CENTROID) |
code[0] |= 1 << 24; |
} |
if (i->encSize == 8) { |
code[1] = |
(code[0] & (3 << 24)) >> (24 - 16) | |
(code[0] & (1 << 8)) << (18 - 8); |
code[0] &= ~0x03000100; |
code[0] |= 1; |
emitFlagsRd(i); |
} |
} |
void |
CodeEmitterNV50::emitMINMAX(const Instruction *i) |
{ |
if (i->dType == TYPE_F64) { |
code[0] = 0xe0000000; |
code[1] = (i->op == OP_MIN) ? 0xa0000000 : 0xc0000000; |
} else { |
code[0] = 0x30000000; |
code[1] = 0x80000000; |
if (i->op == OP_MIN) |
code[1] |= 0x20000000; |
switch (i->dType) { |
case TYPE_F32: code[0] |= 0x80000000; break; |
case TYPE_S32: code[1] |= 0x8c000000; break; |
case TYPE_U32: code[1] |= 0x84000000; break; |
case TYPE_S16: code[1] |= 0x80000000; break; |
case TYPE_U16: break; |
default: |
assert(0); |
break; |
} |
code[1] |= i->src(0).mod.abs() << 20; |
code[1] |= i->src(0).mod.neg() << 26; |
code[1] |= i->src(1).mod.abs() << 19; |
code[1] |= i->src(1).mod.neg() << 27; |
} |
emitForm_MAD(i); |
} |
void |
CodeEmitterNV50::emitFMAD(const Instruction *i) |
{ |
const int neg_mul = i->src(0).mod.neg() ^ i->src(1).mod.neg(); |
const int neg_add = i->src(2).mod.neg(); |
code[0] = 0xe0000000; |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
code[1] = 0; |
emitForm_IMM(i); |
code[0] |= neg_mul << 15; |
code[0] |= neg_add << 22; |
if (i->saturate) |
code[0] |= 1 << 8; |
} else |
if (i->encSize == 4) { |
emitForm_MUL(i); |
code[0] |= neg_mul << 15; |
code[0] |= neg_add << 22; |
if (i->saturate) |
code[0] |= 1 << 8; |
} else { |
code[1] = neg_mul << 26; |
code[1] |= neg_add << 27; |
if (i->saturate) |
code[1] |= 1 << 29; |
emitForm_MAD(i); |
} |
} |
void |
CodeEmitterNV50::emitFADD(const Instruction *i) |
{ |
const int neg0 = i->src(0).mod.neg(); |
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); |
code[0] = 0xb0000000; |
assert(!(i->src(0).mod | i->src(1).mod).abs()); |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
code[1] = 0; |
emitForm_IMM(i); |
code[0] |= neg0 << 15; |
code[0] |= neg1 << 22; |
if (i->saturate) |
code[0] |= 1 << 8; |
} else |
if (i->encSize == 8) { |
code[1] = 0; |
emitForm_ADD(i); |
code[1] |= neg0 << 26; |
code[1] |= neg1 << 27; |
if (i->saturate) |
code[1] |= 1 << 29; |
} else { |
emitForm_MUL(i); |
code[0] |= neg0 << 15; |
code[0] |= neg1 << 22; |
if (i->saturate) |
code[0] |= 1 << 8; |
} |
} |
void |
CodeEmitterNV50::emitUADD(const Instruction *i) |
{ |
const int neg0 = i->src(0).mod.neg(); |
const int neg1 = i->src(1).mod.neg() ^ ((i->op == OP_SUB) ? 1 : 0); |
code[0] = 0x20008000; |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
code[1] = 0; |
emitForm_IMM(i); |
} else |
if (i->encSize == 8) { |
code[0] = 0x20000000; |
code[1] = (typeSizeof(i->dType) == 2) ? 0 : 0x04000000; |
emitForm_ADD(i); |
} else { |
emitForm_MUL(i); |
} |
assert(!(neg0 && neg1)); |
code[0] |= neg0 << 28; |
code[0] |= neg1 << 22; |
if (i->flagsSrc >= 0) { |
// addc == sub | subr |
assert(!(code[0] & 0x10400000) && !i->getPredicate()); |
code[0] |= 0x10400000; |
srcId(i->src(i->flagsSrc), 32 + 12); |
} |
} |
void |
CodeEmitterNV50::emitAADD(const Instruction *i) |
{ |
const int s = (i->op == OP_MOV) ? 0 : 1; |
code[0] = 0xd0000001 | (i->getSrc(s)->reg.data.u16 << 9); |
code[1] = 0x20000000; |
code[0] |= (DDATA(i->def(0)).id + 1) << 2; |
emitFlagsRd(i); |
if (s && i->srcExists(0)) |
setARegBits(SDATA(i->src(0)).id + 1); |
} |
void |
CodeEmitterNV50::emitIMUL(const Instruction *i) |
{ |
code[0] = 0x40000000; |
if (i->encSize == 8) { |
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000; |
emitForm_MAD(i); |
} else { |
if (i->sType == TYPE_S16) |
code[0] |= 0x8100; |
emitForm_MUL(i); |
} |
} |
void |
CodeEmitterNV50::emitFMUL(const Instruction *i) |
{ |
const int neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
code[0] = 0xc0000000; |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
code[1] = 0; |
emitForm_IMM(i); |
if (neg) |
code[0] |= 0x8000; |
if (i->saturate) |
code[0] |= 1 << 8; |
} else |
if (i->encSize == 8) { |
code[1] = i->rnd == ROUND_Z ? 0x0000c000 : 0; |
if (neg) |
code[1] |= 0x08000000; |
if (i->saturate) |
code[1] |= 1 << 20; |
emitForm_MAD(i); |
} else { |
emitForm_MUL(i); |
if (neg) |
code[0] |= 0x8000; |
if (i->saturate) |
code[0] |= 1 << 8; |
} |
} |
void |
CodeEmitterNV50::emitIMAD(const Instruction *i) |
{ |
code[0] = 0x60000000; |
if (isSignedType(i->sType)) |
code[1] = i->saturate ? 0x40000000 : 0x20000000; |
else |
code[1] = 0x00000000; |
int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg(); |
int neg2 = i->src(2).mod.neg(); |
assert(!(neg1 & neg2)); |
code[1] |= neg1 << 27; |
code[1] |= neg2 << 26; |
emitForm_MAD(i); |
if (i->flagsSrc >= 0) { |
// add with carry from $cX |
assert(!(code[1] & 0x0c000000) && !i->getPredicate()); |
code[1] |= 0xc << 24; |
srcId(i->src(i->flagsSrc), 32 + 12); |
} |
} |
void |
CodeEmitterNV50::emitISAD(const Instruction *i) |
{ |
if (i->encSize == 8) { |
code[0] = 0x50000000; |
switch (i->sType) { |
case TYPE_U32: code[1] = 0x04000000; break; |
case TYPE_S32: code[1] = 0x0c000000; break; |
case TYPE_U16: code[1] = 0x00000000; break; |
case TYPE_S16: code[1] = 0x08000000; break; |
default: |
assert(0); |
break; |
} |
emitForm_MAD(i); |
} else { |
switch (i->sType) { |
case TYPE_U32: code[0] = 0x50008000; break; |
case TYPE_S32: code[0] = 0x50008100; break; |
case TYPE_U16: code[0] = 0x50000000; break; |
case TYPE_S16: code[0] = 0x50000100; break; |
default: |
assert(0); |
break; |
} |
emitForm_MUL(i); |
} |
} |
void |
CodeEmitterNV50::emitSET(const Instruction *i) |
{ |
code[0] = 0x30000000; |
code[1] = 0x60000000; |
emitCondCode(i->asCmp()->setCond, i->sType, 32 + 14); |
switch (i->sType) { |
case TYPE_F32: code[0] |= 0x80000000; break; |
case TYPE_S32: code[1] |= 0x0c000000; break; |
case TYPE_U32: code[1] |= 0x04000000; break; |
case TYPE_S16: code[1] |= 0x08000000; break; |
case TYPE_U16: break; |
default: |
assert(0); |
break; |
} |
if (i->src(0).mod.neg()) code[1] |= 0x04000000; |
if (i->src(1).mod.neg()) code[1] |= 0x08000000; |
if (i->src(0).mod.abs()) code[1] |= 0x00100000; |
if (i->src(1).mod.abs()) code[1] |= 0x00080000; |
emitForm_MAD(i); |
} |
void |
CodeEmitterNV50::roundMode_CVT(RoundMode rnd) |
{ |
switch (rnd) { |
case ROUND_NI: code[1] |= 0x08000000; break; |
case ROUND_M: code[1] |= 0x00020000; break; |
case ROUND_MI: code[1] |= 0x08020000; break; |
case ROUND_P: code[1] |= 0x00040000; break; |
case ROUND_PI: code[1] |= 0x08040000; break; |
case ROUND_Z: code[1] |= 0x00060000; break; |
case ROUND_ZI: code[1] |= 0x08060000; break; |
default: |
assert(rnd == ROUND_N); |
break; |
} |
} |
void |
CodeEmitterNV50::emitCVT(const Instruction *i) |
{ |
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); |
RoundMode rnd; |
DataType dType; |
switch (i->op) { |
case OP_CEIL: rnd = f2f ? ROUND_PI : ROUND_P; break; |
case OP_FLOOR: rnd = f2f ? ROUND_MI : ROUND_M; break; |
case OP_TRUNC: rnd = f2f ? ROUND_ZI : ROUND_Z; break; |
default: |
rnd = i->rnd; |
break; |
} |
if (i->op == OP_NEG && i->dType == TYPE_U32) |
dType = TYPE_S32; |
else |
dType = i->dType; |
code[0] = 0xa0000000; |
switch (dType) { |
case TYPE_F64: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0xc4404000; break; |
case TYPE_S64: code[1] = 0x44414000; break; |
case TYPE_U64: code[1] = 0x44404000; break; |
case TYPE_F32: code[1] = 0xc4400000; break; |
case TYPE_S32: code[1] = 0x44410000; break; |
case TYPE_U32: code[1] = 0x44400000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_S64: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0x8c404000; break; |
case TYPE_F32: code[1] = 0x8c400000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_U64: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0x84404000; break; |
case TYPE_F32: code[1] = 0x84400000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_F32: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0xc0404000; break; |
case TYPE_S64: code[1] = 0x40414000; break; |
case TYPE_U64: code[1] = 0x40404000; break; |
case TYPE_F32: code[1] = 0xc4004000; break; |
case TYPE_S32: code[1] = 0x44014000; break; |
case TYPE_U32: code[1] = 0x44004000; break; |
case TYPE_F16: code[1] = 0xc4000000; break; |
case TYPE_U16: code[1] = 0x44000000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_S32: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0x88404000; break; |
case TYPE_F32: code[1] = 0x8c004000; break; |
case TYPE_S32: code[1] = 0x0c014000; break; |
case TYPE_U32: code[1] = 0x0c004000; break; |
case TYPE_F16: code[1] = 0x8c000000; break; |
case TYPE_S16: code[1] = 0x0c010000; break; |
case TYPE_U16: code[1] = 0x0c000000; break; |
case TYPE_S8: code[1] = 0x0c018000; break; |
case TYPE_U8: code[1] = 0x0c008000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_U32: |
switch (i->sType) { |
case TYPE_F64: code[1] = 0x80404000; break; |
case TYPE_F32: code[1] = 0x84004000; break; |
case TYPE_S32: code[1] = 0x04014000; break; |
case TYPE_U32: code[1] = 0x04004000; break; |
case TYPE_F16: code[1] = 0x84000000; break; |
case TYPE_S16: code[1] = 0x04010000; break; |
case TYPE_U16: code[1] = 0x04000000; break; |
case TYPE_S8: code[1] = 0x04018000; break; |
case TYPE_U8: code[1] = 0x04008000; break; |
default: |
assert(0); |
break; |
} |
break; |
case TYPE_S16: |
case TYPE_U16: |
case TYPE_S8: |
case TYPE_U8: |
default: |
assert(0); |
break; |
} |
if (typeSizeof(i->sType) == 1 && i->getSrc(0)->reg.size == 4) |
code[1] |= 0x00004000; |
roundMode_CVT(rnd); |
switch (i->op) { |
case OP_ABS: code[1] |= 1 << 20; break; |
case OP_SAT: code[1] |= 1 << 19; break; |
case OP_NEG: code[1] |= 1 << 29; break; |
default: |
break; |
} |
code[1] ^= i->src(0).mod.neg() << 29; |
code[1] |= i->src(0).mod.abs() << 20; |
if (i->saturate) |
code[1] |= 1 << 19; |
assert(i->op != OP_ABS || !i->src(0).mod.neg()); |
emitForm_MAD(i); |
} |
void |
CodeEmitterNV50::emitPreOp(const Instruction *i) |
{ |
code[0] = 0xb0000000; |
code[1] = (i->op == OP_PREEX2) ? 0xc0004000 : 0xc0000000; |
code[1] |= i->src(0).mod.abs() << 20; |
code[1] |= i->src(0).mod.neg() << 26; |
emitForm_MAD(i); |
} |
void |
CodeEmitterNV50::emitSFnOp(const Instruction *i, uint8_t subOp) |
{ |
code[0] = 0x90000000; |
if (i->encSize == 4) { |
assert(i->op == OP_RCP); |
code[0] |= i->src(0).mod.abs() << 15; |
code[0] |= i->src(0).mod.neg() << 22; |
emitForm_MUL(i); |
} else { |
code[1] = subOp << 29; |
code[1] |= i->src(0).mod.abs() << 20; |
code[1] |= i->src(0).mod.neg() << 26; |
emitForm_MAD(i); |
} |
} |
void |
CodeEmitterNV50::emitNOT(const Instruction *i) |
{ |
code[0] = 0xd0000000; |
code[1] = 0x0002c000; |
switch (i->sType) { |
case TYPE_U32: |
case TYPE_S32: |
code[1] |= 0x04000000; |
break; |
default: |
break; |
} |
emitForm_MAD(i); |
setSrc(i, 0, 1); |
} |
void |
CodeEmitterNV50::emitLogicOp(const Instruction *i) |
{ |
code[0] = 0xd0000000; |
code[1] = 0; |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
switch (i->op) { |
case OP_OR: code[0] |= 0x0100; break; |
case OP_XOR: code[0] |= 0x8000; break; |
default: |
assert(i->op == OP_AND); |
break; |
} |
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) |
code[0] |= 1 << 22; |
emitForm_IMM(i); |
} else { |
switch (i->op) { |
case OP_AND: code[1] = 0x04000000; break; |
case OP_OR: code[1] = 0x04004000; break; |
case OP_XOR: code[1] = 0x04008000; break; |
default: |
assert(0); |
break; |
} |
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 1 << 16; |
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 1 << 17; |
emitForm_MAD(i); |
} |
} |
void |
CodeEmitterNV50::emitARL(const Instruction *i, unsigned int shl) |
{ |
code[0] = 0x00000001 | (shl << 16); |
code[1] = 0xc0000000; |
code[0] |= (DDATA(i->def(0)).id + 1) << 2; |
setSrcFileBits(i, NV50_OP_ENC_IMM); |
setSrc(i, 0, 0); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitShift(const Instruction *i) |
{ |
if (i->def(0).getFile() == FILE_ADDRESS) { |
assert(i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE); |
emitARL(i, i->getSrc(1)->reg.data.u32 & 0x3f); |
} else { |
code[0] = 0x30000001; |
code[1] = (i->op == OP_SHR) ? 0xe4000000 : 0xc4000000; |
if (i->op == OP_SHR && isSignedType(i->sType)) |
code[1] |= 1 << 27; |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
code[1] |= 1 << 20; |
code[0] |= (i->getSrc(1)->reg.data.u32 & 0x7f) << 16; |
defId(i->def(0), 2); |
srcId(i->src(0), 9); |
emitFlagsRd(i); |
} else { |
emitForm_MAD(i); |
} |
} |
} |
void |
CodeEmitterNV50::emitOUT(const Instruction *i) |
{ |
code[0] = (i->op == OP_EMIT) ? 0xf0000201 : 0xf0000401; |
code[1] = 0xc0000000; |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitTEX(const TexInstruction *i) |
{ |
code[0] = 0xf0000001; |
code[1] = 0x00000000; |
switch (i->op) { |
case OP_TXB: |
code[1] = 0x20000000; |
break; |
case OP_TXL: |
code[1] = 0x40000000; |
break; |
case OP_TXF: |
code[0] |= 0x01000000; |
break; |
case OP_TXG: |
code[0] |= 0x01000000; |
code[1] = 0x80000000; |
break; |
case OP_TXLQ: |
code[1] = 0x60020000; |
break; |
default: |
assert(i->op == OP_TEX); |
break; |
} |
code[0] |= i->tex.r << 9; |
code[0] |= i->tex.s << 17; |
int argc = i->tex.target.getArgCount(); |
if (i->op == OP_TXB || i->op == OP_TXL || i->op == OP_TXF) |
argc += 1; |
if (i->tex.target.isShadow()) |
argc += 1; |
assert(argc <= 4); |
code[0] |= (argc - 1) << 22; |
if (i->tex.target.isCube()) { |
code[0] |= 0x08000000; |
} else |
if (i->tex.useOffsets) { |
code[1] |= (i->tex.offset[0] & 0xf) << 24; |
code[1] |= (i->tex.offset[1] & 0xf) << 20; |
code[1] |= (i->tex.offset[2] & 0xf) << 16; |
} |
code[0] |= (i->tex.mask & 0x3) << 25; |
code[1] |= (i->tex.mask & 0xc) << 12; |
if (i->tex.liveOnly) |
code[1] |= 4; |
defId(i->def(0), 2); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitTXQ(const TexInstruction *i) |
{ |
assert(i->tex.query == TXQ_DIMS); |
code[0] = 0xf0000001; |
code[1] = 0x60000000; |
code[0] |= i->tex.r << 9; |
code[0] |= i->tex.s << 17; |
code[0] |= (i->tex.mask & 0x3) << 25; |
code[1] |= (i->tex.mask & 0xc) << 12; |
defId(i->def(0), 2); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitTEXPREP(const TexInstruction *i) |
{ |
code[0] = 0xf8000001 | (3 << 22) | (i->tex.s << 17) | (i->tex.r << 9); |
code[1] = 0x60010000; |
code[0] |= (i->tex.mask & 0x3) << 25; |
code[1] |= (i->tex.mask & 0xc) << 12; |
defId(i->def(0), 2); |
emitFlagsRd(i); |
} |
void |
CodeEmitterNV50::emitPRERETEmu(const FlowInstruction *i) |
{ |
uint32_t pos = i->target.bb->binPos + 8; // +8 to skip an op */ |
code[0] = 0x10000003; // bra |
code[1] = 0x00000780; // always |
switch (i->subOp) { |
case NV50_IR_SUBOP_EMU_PRERET + 0: // bra to the call |
break; |
case NV50_IR_SUBOP_EMU_PRERET + 1: // bra to skip the call |
pos += 8; |
break; |
default: |
assert(i->subOp == (NV50_IR_SUBOP_EMU_PRERET + 2)); |
code[0] = 0x20000003; // call |
code[1] = 0x00000000; // no predicate |
break; |
} |
addReloc(RelocEntry::TYPE_CODE, 0, pos, 0x07fff800, 9); |
addReloc(RelocEntry::TYPE_CODE, 1, pos, 0x000fc000, -4); |
} |
void |
CodeEmitterNV50::emitFlow(const Instruction *i, uint8_t flowOp) |
{ |
const FlowInstruction *f = i->asFlow(); |
bool hasPred = false; |
bool hasTarg = false; |
code[0] = 0x00000003 | (flowOp << 28); |
code[1] = 0x00000000; |
switch (i->op) { |
case OP_BRA: |
hasPred = true; |
hasTarg = true; |
break; |
case OP_BREAK: |
case OP_BRKPT: |
case OP_DISCARD: |
case OP_RET: |
hasPred = true; |
break; |
case OP_CALL: |
case OP_PREBREAK: |
case OP_JOINAT: |
hasTarg = true; |
break; |
case OP_PRERET: |
hasTarg = true; |
if (i->subOp >= NV50_IR_SUBOP_EMU_PRERET) { |
emitPRERETEmu(f); |
return; |
} |
break; |
default: |
break; |
} |
if (hasPred) |
emitFlagsRd(i); |
if (hasTarg && f) { |
uint32_t pos; |
if (f->op == OP_CALL) { |
if (f->builtin) { |
pos = targNV50->getBuiltinOffset(f->target.builtin); |
} else { |
pos = f->target.fn->binPos; |
} |
} else { |
pos = f->target.bb->binPos; |
} |
code[0] |= ((pos >> 2) & 0xffff) << 11; |
code[1] |= ((pos >> 18) & 0x003f) << 14; |
RelocEntry::Type relocTy; |
relocTy = f->builtin ? RelocEntry::TYPE_BUILTIN : RelocEntry::TYPE_CODE; |
addReloc(relocTy, 0, pos, 0x07fff800, 9); |
addReloc(relocTy, 1, pos, 0x000fc000, -4); |
} |
} |
void |
CodeEmitterNV50::emitBAR(const Instruction *i) |
{ |
ImmediateValue *barId = i->getSrc(0)->asImm(); |
assert(barId); |
code[0] = 0x82000003 | (barId->reg.data.u32 << 21); |
code[1] = 0x00004000; |
if (i->subOp == NV50_IR_SUBOP_BAR_SYNC) |
code[0] |= 1 << 26; |
} |
void |
CodeEmitterNV50::emitATOM(const Instruction *i) |
{ |
uint8_t subOp; |
switch (i->subOp) { |
case NV50_IR_SUBOP_ATOM_ADD: subOp = 0x0; break; |
case NV50_IR_SUBOP_ATOM_MIN: subOp = 0x7; break; |
case NV50_IR_SUBOP_ATOM_MAX: subOp = 0x6; break; |
case NV50_IR_SUBOP_ATOM_INC: subOp = 0x4; break; |
case NV50_IR_SUBOP_ATOM_DEC: subOp = 0x5; break; |
case NV50_IR_SUBOP_ATOM_AND: subOp = 0xa; break; |
case NV50_IR_SUBOP_ATOM_OR: subOp = 0xb; break; |
case NV50_IR_SUBOP_ATOM_XOR: subOp = 0xc; break; |
case NV50_IR_SUBOP_ATOM_CAS: subOp = 0x2; break; |
case NV50_IR_SUBOP_ATOM_EXCH: subOp = 0x1; break; |
default: |
assert(!"invalid subop"); |
return; |
} |
code[0] = 0xd0000001; |
code[1] = 0xe0c00000 | (subOp << 2); |
if (isSignedType(i->dType)) |
code[1] |= 1 << 21; |
// args |
emitFlagsRd(i); |
setDst(i, 0); |
setSrc(i, 1, 1); |
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) |
setSrc(i, 2, 2); |
// g[] pointer |
code[0] |= i->getSrc(0)->reg.fileIndex << 23; |
srcId(i->getIndirect(0, 0), 9); |
} |
bool |
CodeEmitterNV50::emitInstruction(Instruction *insn) |
{ |
if (!insn->encSize) { |
ERROR("skipping unencodable instruction: "); insn->print(); |
return false; |
} else |
if (codeSize + insn->encSize > codeSizeLimit) { |
ERROR("code emitter output buffer too small\n"); |
return false; |
} |
if (insn->bb->getProgram()->dbgFlags & NV50_IR_DEBUG_BASIC) { |
INFO("EMIT: "); insn->print(); |
} |
switch (insn->op) { |
case OP_MOV: |
emitMOV(insn); |
break; |
case OP_EXIT: |
case OP_NOP: |
case OP_JOIN: |
emitNOP(); |
break; |
case OP_VFETCH: |
case OP_LOAD: |
emitLOAD(insn); |
break; |
case OP_EXPORT: |
case OP_STORE: |
emitSTORE(insn); |
break; |
case OP_PFETCH: |
emitPFETCH(insn); |
break; |
case OP_RDSV: |
emitRDSV(insn); |
break; |
case OP_LINTERP: |
case OP_PINTERP: |
emitINTERP(insn); |
break; |
case OP_ADD: |
case OP_SUB: |
if (isFloatType(insn->dType)) |
emitFADD(insn); |
else if (insn->getDef(0)->reg.file == FILE_ADDRESS) |
emitAADD(insn); |
else |
emitUADD(insn); |
break; |
case OP_MUL: |
if (isFloatType(insn->dType)) |
emitFMUL(insn); |
else |
emitIMUL(insn); |
break; |
case OP_MAD: |
case OP_FMA: |
if (isFloatType(insn->dType)) |
emitFMAD(insn); |
else |
emitIMAD(insn); |
break; |
case OP_SAD: |
emitISAD(insn); |
break; |
case OP_NOT: |
emitNOT(insn); |
break; |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
emitLogicOp(insn); |
break; |
case OP_SHL: |
case OP_SHR: |
emitShift(insn); |
break; |
case OP_SET: |
emitSET(insn); |
break; |
case OP_MIN: |
case OP_MAX: |
emitMINMAX(insn); |
break; |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_ABS: |
case OP_NEG: |
case OP_SAT: |
emitCVT(insn); |
break; |
case OP_CVT: |
if (insn->def(0).getFile() == FILE_ADDRESS) |
emitARL(insn, 0); |
else |
if (insn->def(0).getFile() == FILE_FLAGS || |
insn->src(0).getFile() == FILE_FLAGS || |
insn->src(0).getFile() == FILE_ADDRESS) |
emitMOV(insn); |
else |
emitCVT(insn); |
break; |
case OP_RCP: |
emitSFnOp(insn, 0); |
break; |
case OP_RSQ: |
emitSFnOp(insn, 2); |
break; |
case OP_LG2: |
emitSFnOp(insn, 3); |
break; |
case OP_SIN: |
emitSFnOp(insn, 4); |
break; |
case OP_COS: |
emitSFnOp(insn, 5); |
break; |
case OP_EX2: |
emitSFnOp(insn, 6); |
break; |
case OP_PRESIN: |
case OP_PREEX2: |
emitPreOp(insn); |
break; |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXF: |
case OP_TXG: |
case OP_TXLQ: |
emitTEX(insn->asTex()); |
break; |
case OP_TXQ: |
emitTXQ(insn->asTex()); |
break; |
case OP_TEXPREP: |
emitTEXPREP(insn->asTex()); |
break; |
case OP_EMIT: |
case OP_RESTART: |
emitOUT(insn); |
break; |
case OP_DISCARD: |
emitFlow(insn, 0x0); |
break; |
case OP_BRA: |
emitFlow(insn, 0x1); |
break; |
case OP_CALL: |
emitFlow(insn, 0x2); |
break; |
case OP_RET: |
emitFlow(insn, 0x3); |
break; |
case OP_PREBREAK: |
emitFlow(insn, 0x4); |
break; |
case OP_BREAK: |
emitFlow(insn, 0x5); |
break; |
case OP_QUADON: |
emitFlow(insn, 0x6); |
break; |
case OP_QUADPOP: |
emitFlow(insn, 0x7); |
break; |
case OP_JOINAT: |
emitFlow(insn, 0xa); |
break; |
case OP_PRERET: |
emitFlow(insn, 0xd); |
break; |
case OP_QUADOP: |
emitQUADOP(insn, insn->lanes, insn->subOp); |
break; |
case OP_DFDX: |
emitQUADOP(insn, 4, insn->src(0).mod.neg() ? 0x66 : 0x99); |
break; |
case OP_DFDY: |
emitQUADOP(insn, 5, insn->src(0).mod.neg() ? 0x5a : 0xa5); |
break; |
case OP_ATOM: |
emitATOM(insn); |
break; |
case OP_BAR: |
emitBAR(insn); |
break; |
case OP_PHI: |
case OP_UNION: |
case OP_CONSTRAINT: |
ERROR("operation should have been eliminated\n"); |
return false; |
case OP_EXP: |
case OP_LOG: |
case OP_SQRT: |
case OP_POW: |
case OP_SELP: |
case OP_SLCT: |
case OP_TXD: |
case OP_PRECONT: |
case OP_CONT: |
case OP_POPCNT: |
case OP_INSBF: |
case OP_EXTBF: |
ERROR("operation should have been lowered\n"); |
return false; |
default: |
ERROR("unknown op: %u\n", insn->op); |
return false; |
} |
if (insn->join || insn->op == OP_JOIN) |
code[1] |= 0x2; |
else |
if (insn->exit || insn->op == OP_EXIT) |
code[1] |= 0x1; |
assert((insn->encSize == 8) == (code[0] & 1)); |
code += insn->encSize / 4; |
codeSize += insn->encSize; |
return true; |
} |
uint32_t |
CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const |
{ |
const Target::OpInfo &info = targ->getOpInfo(i); |
if (info.minEncSize > 4) |
return 8; |
// check constraints on dst and src operands |
for (int d = 0; i->defExists(d); ++d) { |
if (i->def(d).rep()->reg.data.id > 63 || |
i->def(d).rep()->reg.file != FILE_GPR) |
return 8; |
} |
for (int s = 0; i->srcExists(s); ++s) { |
DataFile sf = i->src(s).getFile(); |
if (sf != FILE_GPR) |
if (sf != FILE_SHADER_INPUT || progType != Program::TYPE_FRAGMENT) |
return 8; |
if (i->src(s).rep()->reg.data.id > 63) |
return 8; |
} |
// check modifiers & rounding |
if (i->join || i->lanes != 0xf || i->exit) |
return 8; |
if (i->op == OP_MUL && i->rnd != ROUND_N) |
return 8; |
if (i->asTex()) |
return 8; // TODO: short tex encoding |
// check constraints on short MAD |
if (info.srcNr >= 2 && i->srcExists(2)) { |
if (!i->defExists(0) || !isFloatType(i->dType) || |
i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id) |
return 8; |
} |
return info.minEncSize; |
} |
// Change the encoding size of an instruction after BBs have been scheduled. |
static void |
makeInstructionLong(Instruction *insn) |
{ |
if (insn->encSize == 8) |
return; |
Function *fn = insn->bb->getFunction(); |
int n = 0; |
int adj = 4; |
for (Instruction *i = insn->next; i && i->encSize == 4; ++n, i = i->next); |
if (n & 1) { |
adj = 8; |
insn->next->encSize = 8; |
} else |
if (insn->prev && insn->prev->encSize == 4) { |
adj = 8; |
insn->prev->encSize = 8; |
} |
insn->encSize = 8; |
for (int i = fn->bbCount - 1; i >= 0 && fn->bbArray[i] != insn->bb; --i) { |
fn->bbArray[i]->binPos += 4; |
} |
fn->binSize += adj; |
insn->bb->binSize += adj; |
} |
static bool |
trySetExitModifier(Instruction *insn) |
{ |
if (insn->op == OP_DISCARD || |
insn->op == OP_QUADON || |
insn->op == OP_QUADPOP) |
return false; |
for (int s = 0; insn->srcExists(s); ++s) |
if (insn->src(s).getFile() == FILE_IMMEDIATE) |
return false; |
if (insn->asFlow()) { |
if (insn->op == OP_CALL) // side effects ! |
return false; |
if (insn->getPredicate()) // cannot do conditional exit (or can we ?) |
return false; |
insn->op = OP_EXIT; |
} |
insn->exit = 1; |
makeInstructionLong(insn); |
return true; |
} |
static void |
replaceExitWithModifier(Function *func) |
{ |
BasicBlock *epilogue = BasicBlock::get(func->cfgExit); |
if (!epilogue->getExit() || |
epilogue->getExit()->op != OP_EXIT) // only main will use OP_EXIT |
return; |
if (epilogue->getEntry()->op != OP_EXIT) { |
Instruction *insn = epilogue->getExit()->prev; |
if (!insn || !trySetExitModifier(insn)) |
return; |
insn->exit = 1; |
} else { |
for (Graph::EdgeIterator ei = func->cfgExit->incident(); |
!ei.end(); ei.next()) { |
BasicBlock *bb = BasicBlock::get(ei.getNode()); |
Instruction *i = bb->getExit(); |
if (!i || !trySetExitModifier(i)) |
return; |
} |
} |
epilogue->binSize -= 8; |
func->binSize -= 8; |
delete_Instruction(func->getProgram(), epilogue->getExit()); |
} |
void |
CodeEmitterNV50::prepareEmission(Function *func) |
{ |
CodeEmitter::prepareEmission(func); |
replaceExitWithModifier(func); |
} |
CodeEmitterNV50::CodeEmitterNV50(const TargetNV50 *target) : |
CodeEmitter(target), targNV50(target) |
{ |
targ = target; // specialized |
code = NULL; |
codeSize = codeSizeLimit = 0; |
relocInfo = NULL; |
} |
CodeEmitter * |
TargetNV50::getCodeEmitter(Program::Type type) |
{ |
CodeEmitterNV50 *emit = new CodeEmitterNV50(this); |
emit->setProgramType(type); |
return emit; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp |
---|
0,0 → 1,3097 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target_nvc0.h" |
namespace nv50_ir { |
// Argh, all these assertions ... |
class CodeEmitterNVC0 : public CodeEmitter |
{ |
public: |
CodeEmitterNVC0(const TargetNVC0 *); |
virtual bool emitInstruction(Instruction *); |
virtual uint32_t getMinEncodingSize(const Instruction *) const; |
virtual void prepareEmission(Function *); |
inline void setProgramType(Program::Type pType) { progType = pType; } |
private: |
const TargetNVC0 *targNVC0; |
Program::Type progType; |
const bool writeIssueDelays; |
private: |
void emitForm_A(const Instruction *, uint64_t); |
void emitForm_B(const Instruction *, uint64_t); |
void emitForm_S(const Instruction *, uint32_t, bool pred); |
void emitPredicate(const Instruction *); |
void setAddress16(const ValueRef&); |
void setAddress24(const ValueRef&); |
void setAddressByFile(const ValueRef&); |
void setImmediate(const Instruction *, const int s); // needs op already set |
void setImmediateS8(const ValueRef&); |
void setSUConst16(const Instruction *, const int s); |
void setSUPred(const Instruction *, const int s); |
void emitCondCode(CondCode cc, int pos); |
void emitInterpMode(const Instruction *); |
void emitLoadStoreType(DataType ty); |
void emitSUGType(DataType); |
void emitCachingMode(CacheMode c); |
void emitShortSrc2(const ValueRef&); |
inline uint8_t getSRegEncoding(const ValueRef&); |
void roundMode_A(const Instruction *); |
void roundMode_C(const Instruction *); |
void roundMode_CS(const Instruction *); |
void emitNegAbs12(const Instruction *); |
void emitNOP(const Instruction *); |
void emitLOAD(const Instruction *); |
void emitSTORE(const Instruction *); |
void emitMOV(const Instruction *); |
void emitATOM(const Instruction *); |
void emitMEMBAR(const Instruction *); |
void emitCCTL(const Instruction *); |
void emitINTERP(const Instruction *); |
void emitPFETCH(const Instruction *); |
void emitVFETCH(const Instruction *); |
void emitEXPORT(const Instruction *); |
void emitOUT(const Instruction *); |
void emitUADD(const Instruction *); |
void emitFADD(const Instruction *); |
void emitDADD(const Instruction *); |
void emitUMUL(const Instruction *); |
void emitFMUL(const Instruction *); |
void emitDMUL(const Instruction *); |
void emitIMAD(const Instruction *); |
void emitISAD(const Instruction *); |
void emitFMAD(const Instruction *); |
void emitDMAD(const Instruction *); |
void emitMADSP(const Instruction *); |
void emitNOT(Instruction *); |
void emitLogicOp(const Instruction *, uint8_t subOp); |
void emitPOPC(const Instruction *); |
void emitINSBF(const Instruction *); |
void emitEXTBF(const Instruction *); |
void emitBFIND(const Instruction *); |
void emitPERMT(const Instruction *); |
void emitShift(const Instruction *); |
void emitSFnOp(const Instruction *, uint8_t subOp); |
void emitCVT(Instruction *); |
void emitMINMAX(const Instruction *); |
void emitPreOp(const Instruction *); |
void emitSET(const CmpInstruction *); |
void emitSLCT(const CmpInstruction *); |
void emitSELP(const Instruction *); |
void emitTEXBAR(const Instruction *); |
void emitTEX(const TexInstruction *); |
void emitTEXCSAA(const TexInstruction *); |
void emitTXQ(const TexInstruction *); |
void emitQUADOP(const Instruction *, uint8_t qOp, uint8_t laneMask); |
void emitFlow(const Instruction *); |
void emitBAR(const Instruction *); |
void emitSUCLAMPMode(uint16_t); |
void emitSUCalc(Instruction *); |
void emitSULDGB(const TexInstruction *); |
void emitSUSTGx(const TexInstruction *); |
void emitVSHL(const Instruction *); |
void emitVectorSubOp(const Instruction *); |
void emitPIXLD(const Instruction *); |
inline void defId(const ValueDef&, const int pos); |
inline void defId(const Instruction *, int d, const int pos); |
inline void srcId(const ValueRef&, const int pos); |
inline void srcId(const ValueRef *, const int pos); |
inline void srcId(const Instruction *, int s, const int pos); |
inline void srcAddr32(const ValueRef&, int pos, int shr); |
inline bool isLIMM(const ValueRef&, DataType ty); |
}; |
// for better visibility |
#define HEX64(h, l) 0x##h##l##ULL |
#define SDATA(a) ((a).rep()->reg.data) |
#define DDATA(a) ((a).rep()->reg.data) |
void CodeEmitterNVC0::srcId(const ValueRef& src, const int pos) |
{ |
code[pos / 32] |= (src.get() ? SDATA(src).id : 63) << (pos % 32); |
} |
void CodeEmitterNVC0::srcId(const ValueRef *src, const int pos) |
{ |
code[pos / 32] |= (src ? SDATA(*src).id : 63) << (pos % 32); |
} |
void CodeEmitterNVC0::srcId(const Instruction *insn, int s, int pos) |
{ |
int r = insn->srcExists(s) ? SDATA(insn->src(s)).id : 63; |
code[pos / 32] |= r << (pos % 32); |
} |
void |
CodeEmitterNVC0::srcAddr32(const ValueRef& src, int pos, int shr) |
{ |
const uint32_t offset = SDATA(src).offset >> shr; |
code[pos / 32] |= offset << (pos % 32); |
if (pos && (pos < 32)) |
code[1] |= offset >> (32 - pos); |
} |
void CodeEmitterNVC0::defId(const ValueDef& def, const int pos) |
{ |
code[pos / 32] |= (def.get() ? DDATA(def).id : 63) << (pos % 32); |
} |
void CodeEmitterNVC0::defId(const Instruction *insn, int d, int pos) |
{ |
int r = insn->defExists(d) ? DDATA(insn->def(d)).id : 63; |
code[pos / 32] |= r << (pos % 32); |
} |
bool CodeEmitterNVC0::isLIMM(const ValueRef& ref, DataType ty) |
{ |
const ImmediateValue *imm = ref.get()->asImm(); |
return imm && (imm->reg.data.u32 & ((ty == TYPE_F32) ? 0xfff : 0xfff00000)); |
} |
void |
CodeEmitterNVC0::roundMode_A(const Instruction *insn) |
{ |
switch (insn->rnd) { |
case ROUND_M: code[1] |= 1 << 23; break; |
case ROUND_P: code[1] |= 2 << 23; break; |
case ROUND_Z: code[1] |= 3 << 23; break; |
default: |
assert(insn->rnd == ROUND_N); |
break; |
} |
} |
void |
CodeEmitterNVC0::emitNegAbs12(const Instruction *i) |
{ |
if (i->src(1).mod.abs()) code[0] |= 1 << 6; |
if (i->src(0).mod.abs()) code[0] |= 1 << 7; |
if (i->src(1).mod.neg()) code[0] |= 1 << 8; |
if (i->src(0).mod.neg()) code[0] |= 1 << 9; |
} |
void CodeEmitterNVC0::emitCondCode(CondCode cc, int pos) |
{ |
uint8_t val; |
switch (cc) { |
case CC_LT: val = 0x1; break; |
case CC_LTU: val = 0x9; break; |
case CC_EQ: val = 0x2; break; |
case CC_EQU: val = 0xa; break; |
case CC_LE: val = 0x3; break; |
case CC_LEU: val = 0xb; break; |
case CC_GT: val = 0x4; break; |
case CC_GTU: val = 0xc; break; |
case CC_NE: val = 0x5; break; |
case CC_NEU: val = 0xd; break; |
case CC_GE: val = 0x6; break; |
case CC_GEU: val = 0xe; break; |
case CC_TR: val = 0xf; break; |
case CC_FL: val = 0x0; break; |
case CC_A: val = 0x14; break; |
case CC_NA: val = 0x13; break; |
case CC_S: val = 0x15; break; |
case CC_NS: val = 0x12; break; |
case CC_C: val = 0x16; break; |
case CC_NC: val = 0x11; break; |
case CC_O: val = 0x17; break; |
case CC_NO: val = 0x10; break; |
default: |
val = 0; |
assert(!"invalid condition code"); |
break; |
} |
code[pos / 32] |= val << (pos % 32); |
} |
void |
CodeEmitterNVC0::emitPredicate(const Instruction *i) |
{ |
if (i->predSrc >= 0) { |
assert(i->getPredicate()->reg.file == FILE_PREDICATE); |
srcId(i->src(i->predSrc), 10); |
if (i->cc == CC_NOT_P) |
code[0] |= 0x2000; // negate |
} else { |
code[0] |= 0x1c00; |
} |
} |
void |
CodeEmitterNVC0::setAddressByFile(const ValueRef& src) |
{ |
switch (src.getFile()) { |
case FILE_MEMORY_GLOBAL: |
srcAddr32(src, 26, 0); |
break; |
case FILE_MEMORY_LOCAL: |
case FILE_MEMORY_SHARED: |
setAddress24(src); |
break; |
default: |
assert(src.getFile() == FILE_MEMORY_CONST); |
setAddress16(src); |
break; |
} |
} |
void |
CodeEmitterNVC0::setAddress16(const ValueRef& src) |
{ |
Symbol *sym = src.get()->asSym(); |
assert(sym); |
code[0] |= (sym->reg.data.offset & 0x003f) << 26; |
code[1] |= (sym->reg.data.offset & 0xffc0) >> 6; |
} |
void |
CodeEmitterNVC0::setAddress24(const ValueRef& src) |
{ |
Symbol *sym = src.get()->asSym(); |
assert(sym); |
code[0] |= (sym->reg.data.offset & 0x00003f) << 26; |
code[1] |= (sym->reg.data.offset & 0xffffc0) >> 6; |
} |
void |
CodeEmitterNVC0::setImmediate(const Instruction *i, const int s) |
{ |
const ImmediateValue *imm = i->src(s).get()->asImm(); |
uint32_t u32; |
assert(imm); |
u32 = imm->reg.data.u32; |
if ((code[0] & 0xf) == 0x2) { |
// LIMM |
code[0] |= (u32 & 0x3f) << 26; |
code[1] |= u32 >> 6; |
} else |
if ((code[0] & 0xf) == 0x3 || (code[0] & 0xf) == 4) { |
// integer immediate |
assert((u32 & 0xfff00000) == 0 || (u32 & 0xfff00000) == 0xfff00000); |
assert(!(code[1] & 0xc000)); |
u32 &= 0xfffff; |
code[0] |= (u32 & 0x3f) << 26; |
code[1] |= 0xc000 | (u32 >> 6); |
} else { |
// float immediate |
assert(!(u32 & 0x00000fff)); |
assert(!(code[1] & 0xc000)); |
code[0] |= ((u32 >> 12) & 0x3f) << 26; |
code[1] |= 0xc000 | (u32 >> 18); |
} |
} |
void CodeEmitterNVC0::setImmediateS8(const ValueRef &ref) |
{ |
const ImmediateValue *imm = ref.get()->asImm(); |
int8_t s8 = static_cast<int8_t>(imm->reg.data.s32); |
assert(s8 == imm->reg.data.s32); |
code[0] |= (s8 & 0x3f) << 26; |
code[0] |= (s8 >> 6) << 8; |
} |
void |
CodeEmitterNVC0::emitForm_A(const Instruction *i, uint64_t opc) |
{ |
code[0] = opc; |
code[1] = opc >> 32; |
emitPredicate(i); |
defId(i->def(0), 14); |
int s1 = 26; |
if (i->srcExists(2) && i->getSrc(2)->reg.file == FILE_MEMORY_CONST) |
s1 = 49; |
for (int s = 0; s < 3 && i->srcExists(s); ++s) { |
switch (i->getSrc(s)->reg.file) { |
case FILE_MEMORY_CONST: |
assert(!(code[1] & 0xc000)); |
code[1] |= (s == 2) ? 0x8000 : 0x4000; |
code[1] |= i->getSrc(s)->reg.fileIndex << 10; |
setAddress16(i->src(s)); |
break; |
case FILE_IMMEDIATE: |
assert(s == 1 || |
i->op == OP_MOV || i->op == OP_PRESIN || i->op == OP_PREEX2); |
assert(!(code[1] & 0xc000)); |
setImmediate(i, s); |
break; |
case FILE_GPR: |
if ((s == 2) && ((code[0] & 0x7) == 2)) // LIMM: 3rd src == dst |
break; |
srcId(i->src(s), s ? ((s == 2) ? 49 : s1) : 20); |
break; |
default: |
// ignore here, can be predicate or flags, but must not be address |
break; |
} |
} |
} |
void |
CodeEmitterNVC0::emitForm_B(const Instruction *i, uint64_t opc) |
{ |
code[0] = opc; |
code[1] = opc >> 32; |
emitPredicate(i); |
defId(i->def(0), 14); |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_CONST: |
assert(!(code[1] & 0xc000)); |
code[1] |= 0x4000 | (i->src(0).get()->reg.fileIndex << 10); |
setAddress16(i->src(0)); |
break; |
case FILE_IMMEDIATE: |
assert(!(code[1] & 0xc000)); |
setImmediate(i, 0); |
break; |
case FILE_GPR: |
srcId(i->src(0), 26); |
break; |
default: |
// ignore here, can be predicate or flags, but must not be address |
break; |
} |
} |
void |
CodeEmitterNVC0::emitForm_S(const Instruction *i, uint32_t opc, bool pred) |
{ |
code[0] = opc; |
int ss2a = 0; |
if (opc == 0x0d || opc == 0x0e) |
ss2a = 2; |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
assert(pred || (i->predSrc < 0)); |
if (pred) |
emitPredicate(i); |
for (int s = 1; s < 3 && i->srcExists(s); ++s) { |
if (i->src(s).get()->reg.file == FILE_MEMORY_CONST) { |
assert(!(code[0] & (0x300 >> ss2a))); |
switch (i->src(s).get()->reg.fileIndex) { |
case 0: code[0] |= 0x100 >> ss2a; break; |
case 1: code[0] |= 0x200 >> ss2a; break; |
case 16: code[0] |= 0x300 >> ss2a; break; |
default: |
ERROR("invalid c[] space for short form\n"); |
break; |
} |
if (s == 1) |
code[0] |= i->getSrc(s)->reg.data.offset << 24; |
else |
code[0] |= i->getSrc(s)->reg.data.offset << 6; |
} else |
if (i->src(s).getFile() == FILE_IMMEDIATE) { |
assert(s == 1); |
setImmediateS8(i->src(s)); |
} else |
if (i->src(s).getFile() == FILE_GPR) { |
srcId(i->src(s), (s == 1) ? 26 : 8); |
} |
} |
} |
void |
CodeEmitterNVC0::emitShortSrc2(const ValueRef &src) |
{ |
if (src.getFile() == FILE_MEMORY_CONST) { |
switch (src.get()->reg.fileIndex) { |
case 0: code[0] |= 0x100; break; |
case 1: code[0] |= 0x200; break; |
case 16: code[0] |= 0x300; break; |
default: |
assert(!"unsupported file index for short op"); |
break; |
} |
srcAddr32(src, 20, 2); |
} else { |
srcId(src, 20); |
assert(src.getFile() == FILE_GPR); |
} |
} |
void |
CodeEmitterNVC0::emitNOP(const Instruction *i) |
{ |
code[0] = 0x000001e4; |
code[1] = 0x40000000; |
emitPredicate(i); |
} |
void |
CodeEmitterNVC0::emitFMAD(const Instruction *i) |
{ |
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); |
if (i->encSize == 8) { |
if (isLIMM(i->src(1), TYPE_F32)) { |
emitForm_A(i, HEX64(20000000, 00000002)); |
} else { |
emitForm_A(i, HEX64(30000000, 00000000)); |
if (i->src(2).mod.neg()) |
code[0] |= 1 << 8; |
} |
roundMode_A(i); |
if (neg1) |
code[0] |= 1 << 9; |
if (i->saturate) |
code[0] |= 1 << 5; |
if (i->ftz) |
code[0] |= 1 << 6; |
} else { |
assert(!i->saturate && !i->src(2).mod.neg()); |
emitForm_S(i, (i->src(2).getFile() == FILE_MEMORY_CONST) ? 0x2e : 0x0e, |
false); |
if (neg1) |
code[0] |= 1 << 4; |
} |
} |
void |
CodeEmitterNVC0::emitDMAD(const Instruction *i) |
{ |
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg(); |
emitForm_A(i, HEX64(20000000, 00000001)); |
if (i->src(2).mod.neg()) |
code[0] |= 1 << 8; |
roundMode_A(i); |
if (neg1) |
code[0] |= 1 << 9; |
assert(!i->saturate); |
assert(!i->ftz); |
} |
void |
CodeEmitterNVC0::emitFMUL(const Instruction *i) |
{ |
bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
assert(i->postFactor >= -3 && i->postFactor <= 3); |
if (i->encSize == 8) { |
if (isLIMM(i->src(1), TYPE_F32)) { |
assert(i->postFactor == 0); // constant folded, hopefully |
emitForm_A(i, HEX64(30000000, 00000002)); |
} else { |
emitForm_A(i, HEX64(58000000, 00000000)); |
roundMode_A(i); |
code[1] |= ((i->postFactor > 0) ? |
(7 - i->postFactor) : (0 - i->postFactor)) << 17; |
} |
if (neg) |
code[1] ^= 1 << 25; // aliases with LIMM sign bit |
if (i->saturate) |
code[0] |= 1 << 5; |
if (i->dnz) |
code[0] |= 1 << 7; |
else |
if (i->ftz) |
code[0] |= 1 << 6; |
} else { |
assert(!neg && !i->saturate && !i->ftz && !i->postFactor); |
emitForm_S(i, 0xa8, true); |
} |
} |
void |
CodeEmitterNVC0::emitDMUL(const Instruction *i) |
{ |
bool neg = (i->src(0).mod ^ i->src(1).mod).neg(); |
emitForm_A(i, HEX64(50000000, 00000001)); |
roundMode_A(i); |
if (neg) |
code[0] |= 1 << 9; |
assert(!i->saturate); |
assert(!i->ftz); |
assert(!i->dnz); |
assert(!i->postFactor); |
} |
void |
CodeEmitterNVC0::emitUMUL(const Instruction *i) |
{ |
if (i->encSize == 8) { |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
emitForm_A(i, HEX64(10000000, 00000002)); |
} else { |
emitForm_A(i, HEX64(50000000, 00000003)); |
} |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) |
code[0] |= 1 << 6; |
if (i->sType == TYPE_S32) |
code[0] |= 1 << 5; |
if (i->dType == TYPE_S32) |
code[0] |= 1 << 7; |
} else { |
emitForm_S(i, i->src(1).getFile() == FILE_IMMEDIATE ? 0xaa : 0x2a, true); |
if (i->sType == TYPE_S32) |
code[0] |= 1 << 6; |
} |
} |
void |
CodeEmitterNVC0::emitFADD(const Instruction *i) |
{ |
if (i->encSize == 8) { |
if (isLIMM(i->src(1), TYPE_F32)) { |
assert(!i->saturate); |
emitForm_A(i, HEX64(28000000, 00000002)); |
code[0] |= i->src(0).mod.abs() << 7; |
code[0] |= i->src(0).mod.neg() << 9; |
if (i->src(1).mod.abs()) |
code[1] &= 0xfdffffff; |
if ((i->op == OP_SUB) != static_cast<bool>(i->src(1).mod.neg())) |
code[1] ^= 0x02000000; |
} else { |
emitForm_A(i, HEX64(50000000, 00000000)); |
roundMode_A(i); |
if (i->saturate) |
code[1] |= 1 << 17; |
emitNegAbs12(i); |
if (i->op == OP_SUB) code[0] ^= 1 << 8; |
} |
if (i->ftz) |
code[0] |= 1 << 5; |
} else { |
assert(!i->saturate && i->op != OP_SUB && |
!i->src(0).mod.abs() && |
!i->src(1).mod.neg() && !i->src(1).mod.abs()); |
emitForm_S(i, 0x49, true); |
if (i->src(0).mod.neg()) |
code[0] |= 1 << 7; |
} |
} |
void |
CodeEmitterNVC0::emitDADD(const Instruction *i) |
{ |
assert(i->encSize == 8); |
emitForm_A(i, HEX64(48000000, 00000001)); |
roundMode_A(i); |
assert(!i->saturate); |
assert(!i->ftz); |
emitNegAbs12(i); |
if (i->op == OP_SUB) |
code[0] ^= 1 << 8; |
} |
void |
CodeEmitterNVC0::emitUADD(const Instruction *i) |
{ |
uint32_t addOp = 0; |
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs()); |
assert(!i->src(0).mod.neg() || !i->src(1).mod.neg()); |
if (i->src(0).mod.neg()) |
addOp |= 0x200; |
if (i->src(1).mod.neg()) |
addOp |= 0x100; |
if (i->op == OP_SUB) { |
addOp ^= 0x100; |
assert(addOp != 0x300); // would be add-plus-one |
} |
if (i->encSize == 8) { |
if (isLIMM(i->src(1), TYPE_U32)) { |
emitForm_A(i, HEX64(08000000, 00000002)); |
if (i->defExists(1)) |
code[1] |= 1 << 26; // write carry |
} else { |
emitForm_A(i, HEX64(48000000, 00000003)); |
if (i->defExists(1)) |
code[1] |= 1 << 16; // write carry |
} |
code[0] |= addOp; |
if (i->saturate) |
code[0] |= 1 << 5; |
if (i->flagsSrc >= 0) // add carry |
code[0] |= 1 << 6; |
} else { |
assert(!(addOp & 0x100)); |
emitForm_S(i, (addOp >> 3) | |
((i->src(1).getFile() == FILE_IMMEDIATE) ? 0xac : 0x2c), true); |
} |
} |
// TODO: shl-add |
void |
CodeEmitterNVC0::emitIMAD(const Instruction *i) |
{ |
assert(i->encSize == 8); |
emitForm_A(i, HEX64(20000000, 00000003)); |
if (isSignedType(i->dType)) |
code[0] |= 1 << 7; |
if (isSignedType(i->sType)) |
code[0] |= 1 << 5; |
code[1] |= i->saturate << 24; |
if (i->flagsDef >= 0) code[1] |= 1 << 16; |
if (i->flagsSrc >= 0) code[1] |= 1 << 23; |
if (i->src(2).mod.neg()) code[0] |= 0x10; |
if (i->src(1).mod.neg() ^ |
i->src(0).mod.neg()) code[0] |= 0x20; |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) |
code[0] |= 1 << 6; |
} |
void |
CodeEmitterNVC0::emitMADSP(const Instruction *i) |
{ |
assert(targ->getChipset() >= NVISA_GK104_CHIPSET); |
emitForm_A(i, HEX64(00000000, 00000003)); |
if (i->subOp == NV50_IR_SUBOP_MADSP_SD) { |
code[1] |= 0x01800000; |
} else { |
code[0] |= (i->subOp & 0x00f) << 7; |
code[0] |= (i->subOp & 0x0f0) << 1; |
code[0] |= (i->subOp & 0x100) >> 3; |
code[0] |= (i->subOp & 0x200) >> 2; |
code[1] |= (i->subOp & 0xc00) << 13; |
} |
if (i->flagsDef >= 0) |
code[1] |= 1 << 16; |
} |
void |
CodeEmitterNVC0::emitISAD(const Instruction *i) |
{ |
assert(i->dType == TYPE_S32 || i->dType == TYPE_U32); |
assert(i->encSize == 8); |
emitForm_A(i, HEX64(38000000, 00000003)); |
if (i->dType == TYPE_S32) |
code[0] |= 1 << 5; |
} |
void |
CodeEmitterNVC0::emitNOT(Instruction *i) |
{ |
assert(i->encSize == 8); |
i->setSrc(1, i->src(0)); |
emitForm_A(i, HEX64(68000000, 000001c3)); |
} |
void |
CodeEmitterNVC0::emitLogicOp(const Instruction *i, uint8_t subOp) |
{ |
if (i->def(0).getFile() == FILE_PREDICATE) { |
code[0] = 0x00000004 | (subOp << 30); |
code[1] = 0x0c000000; |
emitPredicate(i); |
defId(i->def(0), 17); |
srcId(i->src(0), 20); |
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 23; |
srcId(i->src(1), 26); |
if (i->src(1).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 29; |
if (i->defExists(1)) { |
defId(i->def(1), 14); |
} else { |
code[0] |= 7 << 14; |
} |
// (a OP b) OP c |
if (i->predSrc != 2 && i->srcExists(2)) { |
code[1] |= subOp << 21; |
srcId(i->src(2), 17); |
if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 20; |
} else { |
code[1] |= 0x000e0000; |
} |
} else |
if (i->encSize == 8) { |
if (isLIMM(i->src(1), TYPE_U32)) { |
emitForm_A(i, HEX64(38000000, 00000002)); |
if (i->flagsDef >= 0) |
code[1] |= 1 << 26; |
} else { |
emitForm_A(i, HEX64(68000000, 00000003)); |
if (i->flagsDef >= 0) |
code[1] |= 1 << 16; |
} |
code[0] |= subOp << 6; |
if (i->flagsSrc >= 0) // carry |
code[0] |= 1 << 5; |
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; |
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; |
} else { |
emitForm_S(i, (subOp << 5) | |
((i->src(1).getFile() == FILE_IMMEDIATE) ? 0x1d : 0x8d), true); |
} |
} |
void |
CodeEmitterNVC0::emitPOPC(const Instruction *i) |
{ |
emitForm_A(i, HEX64(54000000, 00000004)); |
if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 9; |
if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) code[0] |= 1 << 8; |
} |
void |
CodeEmitterNVC0::emitINSBF(const Instruction *i) |
{ |
emitForm_A(i, HEX64(28000000, 00000003)); |
} |
void |
CodeEmitterNVC0::emitEXTBF(const Instruction *i) |
{ |
emitForm_A(i, HEX64(70000000, 00000003)); |
if (i->dType == TYPE_S32) |
code[0] |= 1 << 5; |
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) |
code[0] |= 1 << 8; |
} |
void |
CodeEmitterNVC0::emitBFIND(const Instruction *i) |
{ |
emitForm_B(i, HEX64(78000000, 00000003)); |
if (i->dType == TYPE_S32) |
code[0] |= 1 << 5; |
if (i->src(0).mod == Modifier(NV50_IR_MOD_NOT)) |
code[0] |= 1 << 8; |
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT) |
code[0] |= 1 << 6; |
} |
void |
CodeEmitterNVC0::emitPERMT(const Instruction *i) |
{ |
emitForm_A(i, HEX64(24000000, 00000004)); |
code[0] |= i->subOp << 5; |
} |
void |
CodeEmitterNVC0::emitShift(const Instruction *i) |
{ |
if (i->op == OP_SHR) { |
emitForm_A(i, HEX64(58000000, 00000003) |
| (isSignedType(i->dType) ? 0x20 : 0x00)); |
} else { |
emitForm_A(i, HEX64(60000000, 00000003)); |
} |
if (i->subOp == NV50_IR_SUBOP_SHIFT_WRAP) |
code[0] |= 1 << 9; |
} |
void |
CodeEmitterNVC0::emitPreOp(const Instruction *i) |
{ |
if (i->encSize == 8) { |
emitForm_B(i, HEX64(60000000, 00000000)); |
if (i->op == OP_PREEX2) |
code[0] |= 0x20; |
if (i->src(0).mod.abs()) code[0] |= 1 << 6; |
if (i->src(0).mod.neg()) code[0] |= 1 << 8; |
} else { |
emitForm_S(i, i->op == OP_PREEX2 ? 0x74000008 : 0x70000008, true); |
} |
} |
void |
CodeEmitterNVC0::emitSFnOp(const Instruction *i, uint8_t subOp) |
{ |
if (i->encSize == 8) { |
code[0] = 0x00000000 | (subOp << 26); |
code[1] = 0xc8000000; |
emitPredicate(i); |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
assert(i->src(0).getFile() == FILE_GPR); |
if (i->saturate) code[0] |= 1 << 5; |
if (i->src(0).mod.abs()) code[0] |= 1 << 7; |
if (i->src(0).mod.neg()) code[0] |= 1 << 9; |
} else { |
emitForm_S(i, 0x80000008 | (subOp << 26), true); |
assert(!i->src(0).mod.neg()); |
if (i->src(0).mod.abs()) code[0] |= 1 << 30; |
} |
} |
void |
CodeEmitterNVC0::emitMINMAX(const Instruction *i) |
{ |
uint64_t op; |
assert(i->encSize == 8); |
op = (i->op == OP_MIN) ? 0x080e000000000000ULL : 0x081e000000000000ULL; |
if (i->ftz) |
op |= 1 << 5; |
else |
if (!isFloatType(i->dType)) |
op |= isSignedType(i->dType) ? 0x23 : 0x03; |
if (i->dType == TYPE_F64) |
op |= 0x01; |
emitForm_A(i, op); |
emitNegAbs12(i); |
} |
void |
CodeEmitterNVC0::roundMode_C(const Instruction *i) |
{ |
switch (i->rnd) { |
case ROUND_M: code[1] |= 1 << 17; break; |
case ROUND_P: code[1] |= 2 << 17; break; |
case ROUND_Z: code[1] |= 3 << 17; break; |
case ROUND_NI: code[0] |= 1 << 7; break; |
case ROUND_MI: code[0] |= 1 << 7; code[1] |= 1 << 17; break; |
case ROUND_PI: code[0] |= 1 << 7; code[1] |= 2 << 17; break; |
case ROUND_ZI: code[0] |= 1 << 7; code[1] |= 3 << 17; break; |
case ROUND_N: break; |
default: |
assert(!"invalid round mode"); |
break; |
} |
} |
void |
CodeEmitterNVC0::roundMode_CS(const Instruction *i) |
{ |
switch (i->rnd) { |
case ROUND_M: |
case ROUND_MI: code[0] |= 1 << 16; break; |
case ROUND_P: |
case ROUND_PI: code[0] |= 2 << 16; break; |
case ROUND_Z: |
case ROUND_ZI: code[0] |= 3 << 16; break; |
default: |
break; |
} |
} |
void |
CodeEmitterNVC0::emitCVT(Instruction *i) |
{ |
const bool f2f = isFloatType(i->dType) && isFloatType(i->sType); |
DataType dType; |
switch (i->op) { |
case OP_CEIL: i->rnd = f2f ? ROUND_PI : ROUND_P; break; |
case OP_FLOOR: i->rnd = f2f ? ROUND_MI : ROUND_M; break; |
case OP_TRUNC: i->rnd = f2f ? ROUND_ZI : ROUND_Z; break; |
default: |
break; |
} |
const bool sat = (i->op == OP_SAT) || i->saturate; |
const bool abs = (i->op == OP_ABS) || i->src(0).mod.abs(); |
const bool neg = (i->op == OP_NEG) || i->src(0).mod.neg(); |
if (i->op == OP_NEG && i->dType == TYPE_U32) |
dType = TYPE_S32; |
else |
dType = i->dType; |
if (i->encSize == 8) { |
emitForm_B(i, HEX64(10000000, 00000004)); |
roundMode_C(i); |
// cvt u16 f32 sets high bits to 0, so we don't have to use Value::Size() |
code[0] |= util_logbase2(typeSizeof(dType)) << 20; |
code[0] |= util_logbase2(typeSizeof(i->sType)) << 23; |
if (sat) |
code[0] |= 0x20; |
if (abs) |
code[0] |= 1 << 6; |
if (neg && i->op != OP_ABS) |
code[0] |= 1 << 8; |
if (i->ftz) |
code[1] |= 1 << 23; |
if (isSignedIntType(dType)) |
code[0] |= 0x080; |
if (isSignedIntType(i->sType)) |
code[0] |= 0x200; |
if (isFloatType(dType)) { |
if (!isFloatType(i->sType)) |
code[1] |= 0x08000000; |
} else { |
if (isFloatType(i->sType)) |
code[1] |= 0x04000000; |
else |
code[1] |= 0x0c000000; |
} |
} else { |
if (i->op == OP_CEIL || i->op == OP_FLOOR || i->op == OP_TRUNC) { |
code[0] = 0x298; |
} else |
if (isFloatType(dType)) { |
if (isFloatType(i->sType)) |
code[0] = 0x098; |
else |
code[0] = 0x088 | (isSignedType(i->sType) ? (1 << 8) : 0); |
} else { |
assert(isFloatType(i->sType)); |
code[0] = 0x288 | (isSignedType(i->sType) ? (1 << 8) : 0); |
} |
if (neg) code[0] |= 1 << 16; |
if (sat) code[0] |= 1 << 18; |
if (abs) code[0] |= 1 << 19; |
roundMode_CS(i); |
} |
} |
void |
CodeEmitterNVC0::emitSET(const CmpInstruction *i) |
{ |
uint32_t hi; |
uint32_t lo = 0; |
if (i->sType == TYPE_F64) |
lo = 0x1; |
else |
if (!isFloatType(i->sType)) |
lo = 0x3; |
if (isFloatType(i->dType) || isSignedIntType(i->sType)) |
lo |= 0x20; |
switch (i->op) { |
case OP_SET_AND: hi = 0x10000000; break; |
case OP_SET_OR: hi = 0x10200000; break; |
case OP_SET_XOR: hi = 0x10400000; break; |
default: |
hi = 0x100e0000; |
break; |
} |
emitForm_A(i, (static_cast<uint64_t>(hi) << 32) | lo); |
if (i->op != OP_SET) |
srcId(i->src(2), 32 + 17); |
if (i->def(0).getFile() == FILE_PREDICATE) { |
if (i->sType == TYPE_F32) |
code[1] += 0x10000000; |
else |
code[1] += 0x08000000; |
code[0] &= ~0xfc000; |
defId(i->def(0), 17); |
if (i->defExists(1)) |
defId(i->def(1), 14); |
else |
code[0] |= 0x1c000; |
} |
if (i->ftz) |
code[1] |= 1 << 27; |
emitCondCode(i->setCond, 32 + 23); |
emitNegAbs12(i); |
} |
void |
CodeEmitterNVC0::emitSLCT(const CmpInstruction *i) |
{ |
uint64_t op; |
switch (i->dType) { |
case TYPE_S32: |
op = HEX64(30000000, 00000023); |
break; |
case TYPE_U32: |
op = HEX64(30000000, 00000003); |
break; |
case TYPE_F32: |
op = HEX64(38000000, 00000000); |
break; |
default: |
assert(!"invalid type for SLCT"); |
op = 0; |
break; |
} |
emitForm_A(i, op); |
CondCode cc = i->setCond; |
if (i->src(2).mod.neg()) |
cc = reverseCondCode(cc); |
emitCondCode(cc, 32 + 23); |
if (i->ftz) |
code[0] |= 1 << 5; |
} |
void CodeEmitterNVC0::emitSELP(const Instruction *i) |
{ |
emitForm_A(i, HEX64(20000000, 00000004)); |
if (i->cc == CC_NOT_P || i->src(2).mod & Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 1 << 20; |
} |
void CodeEmitterNVC0::emitTEXBAR(const Instruction *i) |
{ |
code[0] = 0x00000006 | (i->subOp << 26); |
code[1] = 0xf0000000; |
emitPredicate(i); |
emitCondCode(i->flagsSrc >= 0 ? i->cc : CC_ALWAYS, 5); |
} |
void CodeEmitterNVC0::emitTEXCSAA(const TexInstruction *i) |
{ |
code[0] = 0x00000086; |
code[1] = 0xd0000000; |
code[1] |= i->tex.r; |
code[1] |= i->tex.s << 8; |
if (i->tex.liveOnly) |
code[0] |= 1 << 9; |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
} |
static inline bool |
isNextIndependentTex(const TexInstruction *i) |
{ |
if (!i->next || !isTextureOp(i->next->op)) |
return false; |
if (i->getDef(0)->interfers(i->next->getSrc(0))) |
return false; |
return !i->next->srcExists(1) || !i->getDef(0)->interfers(i->next->getSrc(1)); |
} |
void |
CodeEmitterNVC0::emitTEX(const TexInstruction *i) |
{ |
code[0] = 0x00000006; |
if (isNextIndependentTex(i)) |
code[0] |= 0x080; // t mode |
else |
code[0] |= 0x100; // p mode |
if (i->tex.liveOnly) |
code[0] |= 1 << 9; |
switch (i->op) { |
case OP_TEX: code[1] = 0x80000000; break; |
case OP_TXB: code[1] = 0x84000000; break; |
case OP_TXL: code[1] = 0x86000000; break; |
case OP_TXF: code[1] = 0x90000000; break; |
case OP_TXG: code[1] = 0xa0000000; break; |
case OP_TXLQ: code[1] = 0xb0000000; break; |
case OP_TXD: code[1] = 0xe0000000; break; |
default: |
assert(!"invalid texture op"); |
break; |
} |
if (i->op == OP_TXF) { |
if (!i->tex.levelZero) |
code[1] |= 0x02000000; |
} else |
if (i->tex.levelZero) { |
code[1] |= 0x02000000; |
} |
if (i->op != OP_TXD && i->tex.derivAll) |
code[1] |= 1 << 13; |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
emitPredicate(i); |
if (i->op == OP_TXG) code[0] |= i->tex.gatherComp << 5; |
code[1] |= i->tex.mask << 14; |
code[1] |= i->tex.r; |
code[1] |= i->tex.s << 8; |
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) |
code[1] |= 1 << 18; // in 1st source (with array index) |
// texture target: |
code[1] |= (i->tex.target.getDim() - 1) << 20; |
if (i->tex.target.isCube()) |
code[1] += 2 << 20; |
if (i->tex.target.isArray()) |
code[1] |= 1 << 19; |
if (i->tex.target.isShadow()) |
code[1] |= 1 << 24; |
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) |
if (i->srcExists(src1) && i->src(src1).getFile() == FILE_IMMEDIATE) { |
// lzero |
if (i->op == OP_TXL) |
code[1] &= ~(1 << 26); |
else |
if (i->op == OP_TXF) |
code[1] &= ~(1 << 25); |
} |
if (i->tex.target == TEX_TARGET_2D_MS || |
i->tex.target == TEX_TARGET_2D_MS_ARRAY) |
code[1] |= 1 << 23; |
if (i->tex.useOffsets == 1) |
code[1] |= 1 << 22; |
if (i->tex.useOffsets == 4) |
code[1] |= 1 << 23; |
srcId(i, src1, 26); |
} |
void |
CodeEmitterNVC0::emitTXQ(const TexInstruction *i) |
{ |
code[0] = 0x00000086; |
code[1] = 0xc0000000; |
switch (i->tex.query) { |
case TXQ_DIMS: code[1] |= 0 << 22; break; |
case TXQ_TYPE: code[1] |= 1 << 22; break; |
case TXQ_SAMPLE_POSITION: code[1] |= 2 << 22; break; |
case TXQ_FILTER: code[1] |= 3 << 22; break; |
case TXQ_LOD: code[1] |= 4 << 22; break; |
case TXQ_BORDER_COLOUR: code[1] |= 5 << 22; break; |
default: |
assert(!"invalid texture query"); |
break; |
} |
code[1] |= i->tex.mask << 14; |
code[1] |= i->tex.r; |
code[1] |= i->tex.s << 8; |
if (i->tex.sIndirectSrc >= 0 || i->tex.rIndirectSrc >= 0) |
code[1] |= 1 << 18; |
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
srcId(i, src1, 26); |
emitPredicate(i); |
} |
void |
CodeEmitterNVC0::emitQUADOP(const Instruction *i, uint8_t qOp, uint8_t laneMask) |
{ |
code[0] = 0x00000000 | (laneMask << 6); |
code[1] = 0x48000000 | qOp; |
defId(i->def(0), 14); |
srcId(i->src(0), 20); |
srcId(i->srcExists(1) ? i->src(1) : i->src(0), 26); |
if (i->op == OP_QUADOP && progType != Program::TYPE_FRAGMENT) |
code[0] |= 1 << 9; // dall |
emitPredicate(i); |
} |
void |
CodeEmitterNVC0::emitFlow(const Instruction *i) |
{ |
const FlowInstruction *f = i->asFlow(); |
unsigned mask; // bit 0: predicate, bit 1: target |
code[0] = 0x00000007; |
switch (i->op) { |
case OP_BRA: |
code[1] = f->absolute ? 0x00000000 : 0x40000000; |
if (i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST) |
code[0] |= 0x4000; |
mask = 3; |
break; |
case OP_CALL: |
code[1] = f->absolute ? 0x10000000 : 0x50000000; |
if (f->indirect) |
code[0] |= 0x4000; // indirect calls always use c[] source |
mask = 2; |
break; |
case OP_EXIT: code[1] = 0x80000000; mask = 1; break; |
case OP_RET: code[1] = 0x90000000; mask = 1; break; |
case OP_DISCARD: code[1] = 0x98000000; mask = 1; break; |
case OP_BREAK: code[1] = 0xa8000000; mask = 1; break; |
case OP_CONT: code[1] = 0xb0000000; mask = 1; break; |
case OP_JOINAT: code[1] = 0x60000000; mask = 2; break; |
case OP_PREBREAK: code[1] = 0x68000000; mask = 2; break; |
case OP_PRECONT: code[1] = 0x70000000; mask = 2; break; |
case OP_PRERET: code[1] = 0x78000000; mask = 2; break; |
case OP_QUADON: code[1] = 0xc0000000; mask = 0; break; |
case OP_QUADPOP: code[1] = 0xc8000000; mask = 0; break; |
case OP_BRKPT: code[1] = 0xd0000000; mask = 0; break; |
default: |
assert(!"invalid flow operation"); |
return; |
} |
if (mask & 1) { |
emitPredicate(i); |
if (i->flagsSrc < 0) |
code[0] |= 0x1e0; |
} |
if (!f) |
return; |
if (f->allWarp) |
code[0] |= 1 << 15; |
if (f->limit) |
code[0] |= 1 << 16; |
if (f->indirect) { |
if (code[0] & 0x4000) { |
assert(i->srcExists(0) && i->src(0).getFile() == FILE_MEMORY_CONST); |
setAddress16(i->src(0)); |
code[1] |= i->getSrc(0)->reg.fileIndex << 10; |
if (f->op == OP_BRA) |
srcId(f->src(0).getIndirect(0), 20); |
} else { |
srcId(f, 0, 20); |
} |
} |
if (f->op == OP_CALL) { |
if (f->indirect) { |
// nothing |
} else |
if (f->builtin) { |
assert(f->absolute); |
uint32_t pcAbs = targNVC0->getBuiltinOffset(f->target.builtin); |
addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfc000000, 26); |
addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x03ffffff, -6); |
} else { |
assert(!f->absolute); |
int32_t pcRel = f->target.fn->binPos - (codeSize + 8); |
code[0] |= (pcRel & 0x3f) << 26; |
code[1] |= (pcRel >> 6) & 0x3ffff; |
} |
} else |
if (mask & 2) { |
int32_t pcRel = f->target.bb->binPos - (codeSize + 8); |
// currently we don't want absolute branches |
assert(!f->absolute); |
code[0] |= (pcRel & 0x3f) << 26; |
code[1] |= (pcRel >> 6) & 0x3ffff; |
} |
} |
void |
CodeEmitterNVC0::emitBAR(const Instruction *i) |
{ |
Value *rDef = NULL, *pDef = NULL; |
switch (i->subOp) { |
case NV50_IR_SUBOP_BAR_ARRIVE: code[0] = 0x84; break; |
case NV50_IR_SUBOP_BAR_RED_AND: code[0] = 0x24; break; |
case NV50_IR_SUBOP_BAR_RED_OR: code[0] = 0x44; break; |
case NV50_IR_SUBOP_BAR_RED_POPC: code[0] = 0x04; break; |
default: |
code[0] = 0x04; |
assert(i->subOp == NV50_IR_SUBOP_BAR_SYNC); |
break; |
} |
code[1] = 0x50000000; |
code[0] |= 63 << 14; |
code[1] |= 7 << 21; |
emitPredicate(i); |
// barrier id |
if (i->src(0).getFile() == FILE_GPR) { |
srcId(i->src(0), 20); |
} else { |
ImmediateValue *imm = i->getSrc(0)->asImm(); |
assert(imm); |
code[0] |= imm->reg.data.u32 << 20; |
} |
// thread count |
if (i->src(1).getFile() == FILE_GPR) { |
srcId(i->src(1), 26); |
} else { |
ImmediateValue *imm = i->getSrc(1)->asImm(); |
assert(imm); |
code[0] |= imm->reg.data.u32 << 26; |
code[1] |= imm->reg.data.u32 >> 6; |
} |
if (i->srcExists(2) && (i->predSrc != 2)) { |
srcId(i->src(2), 32 + 17); |
if (i->src(2).mod == Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 1 << 20; |
} else { |
code[1] |= 7 << 17; |
} |
if (i->defExists(0)) { |
if (i->def(0).getFile() == FILE_GPR) |
rDef = i->getDef(0); |
else |
pDef = i->getDef(0); |
if (i->defExists(1)) { |
if (i->def(1).getFile() == FILE_GPR) |
rDef = i->getDef(1); |
else |
pDef = i->getDef(1); |
} |
} |
if (rDef) { |
code[0] &= ~(63 << 14); |
defId(rDef, 14); |
} |
if (pDef) { |
code[1] &= ~(7 << 21); |
defId(pDef, 32 + 21); |
} |
} |
void |
CodeEmitterNVC0::emitPFETCH(const Instruction *i) |
{ |
uint32_t prim = i->src(0).get()->reg.data.u32; |
code[0] = 0x00000006 | ((prim & 0x3f) << 26); |
code[1] = 0x00000000 | (prim >> 6); |
emitPredicate(i); |
const int src1 = (i->predSrc == 1) ? 2 : 1; // if predSrc == 1, !srcExists(2) |
defId(i->def(0), 14); |
srcId(i, src1, 20); |
} |
void |
CodeEmitterNVC0::emitVFETCH(const Instruction *i) |
{ |
code[0] = 0x00000006; |
code[1] = 0x06000000 | i->src(0).get()->reg.data.offset; |
if (i->perPatch) |
code[0] |= 0x100; |
if (i->getSrc(0)->reg.file == FILE_SHADER_OUTPUT) |
code[0] |= 0x200; // yes, TCPs can read from *outputs* of other threads |
emitPredicate(i); |
code[0] |= ((i->getDef(0)->reg.size / 4) - 1) << 5; |
defId(i->def(0), 14); |
srcId(i->src(0).getIndirect(0), 20); |
srcId(i->src(0).getIndirect(1), 26); // vertex address |
} |
void |
CodeEmitterNVC0::emitEXPORT(const Instruction *i) |
{ |
unsigned int size = typeSizeof(i->dType); |
code[0] = 0x00000006 | ((size / 4 - 1) << 5); |
code[1] = 0x0a000000 | i->src(0).get()->reg.data.offset; |
assert(!(code[1] & ((size == 12) ? 15 : (size - 1)))); |
if (i->perPatch) |
code[0] |= 0x100; |
emitPredicate(i); |
assert(i->src(1).getFile() == FILE_GPR); |
srcId(i->src(0).getIndirect(0), 20); |
srcId(i->src(0).getIndirect(1), 32 + 17); // vertex base address |
srcId(i->src(1), 26); |
} |
void |
CodeEmitterNVC0::emitOUT(const Instruction *i) |
{ |
code[0] = 0x00000006; |
code[1] = 0x1c000000; |
emitPredicate(i); |
defId(i->def(0), 14); // new secret address |
srcId(i->src(0), 20); // old secret address, should be 0 initially |
assert(i->src(0).getFile() == FILE_GPR); |
if (i->op == OP_EMIT) |
code[0] |= 1 << 5; |
if (i->op == OP_RESTART || i->subOp == NV50_IR_SUBOP_EMIT_RESTART) |
code[0] |= 1 << 6; |
// vertex stream |
if (i->src(1).getFile() == FILE_IMMEDIATE) { |
unsigned int stream = SDATA(i->src(1)).u32; |
assert(stream < 4); |
if (stream) { |
code[1] |= 0xc000; |
code[0] |= stream << 26; |
} else { |
srcId(NULL, 26); |
} |
} else { |
srcId(i->src(1), 26); |
} |
} |
void |
CodeEmitterNVC0::emitInterpMode(const Instruction *i) |
{ |
if (i->encSize == 8) { |
code[0] |= i->ipa << 6; // TODO: INTERP_SAMPLEID |
} else { |
if (i->getInterpMode() == NV50_IR_INTERP_SC) |
code[0] |= 0x80; |
assert(i->op == OP_PINTERP && i->getSampleMode() == 0); |
} |
} |
void |
CodeEmitterNVC0::emitINTERP(const Instruction *i) |
{ |
const uint32_t base = i->getSrc(0)->reg.data.offset; |
if (i->encSize == 8) { |
code[0] = 0x00000000; |
code[1] = 0xc0000000 | (base & 0xffff); |
if (i->saturate) |
code[0] |= 1 << 5; |
if (i->op == OP_PINTERP) |
srcId(i->src(1), 26); |
else |
code[0] |= 0x3f << 26; |
srcId(i->src(0).getIndirect(0), 20); |
} else { |
assert(i->op == OP_PINTERP); |
code[0] = 0x00000009 | ((base & 0xc) << 6) | ((base >> 4) << 26); |
srcId(i->src(1), 20); |
} |
emitInterpMode(i); |
emitPredicate(i); |
defId(i->def(0), 14); |
if (i->getSampleMode() == NV50_IR_INTERP_OFFSET) |
srcId(i->src(i->op == OP_PINTERP ? 2 : 1), 32 + 17); |
else |
code[1] |= 0x3f << 17; |
} |
void |
CodeEmitterNVC0::emitLoadStoreType(DataType ty) |
{ |
uint8_t val; |
switch (ty) { |
case TYPE_U8: |
val = 0x00; |
break; |
case TYPE_S8: |
val = 0x20; |
break; |
case TYPE_F16: |
case TYPE_U16: |
val = 0x40; |
break; |
case TYPE_S16: |
val = 0x60; |
break; |
case TYPE_F32: |
case TYPE_U32: |
case TYPE_S32: |
val = 0x80; |
break; |
case TYPE_F64: |
case TYPE_U64: |
case TYPE_S64: |
val = 0xa0; |
break; |
case TYPE_B128: |
val = 0xc0; |
break; |
default: |
val = 0x80; |
assert(!"invalid type"); |
break; |
} |
code[0] |= val; |
} |
void |
CodeEmitterNVC0::emitCachingMode(CacheMode c) |
{ |
uint32_t val; |
switch (c) { |
case CACHE_CA: |
// case CACHE_WB: |
val = 0x000; |
break; |
case CACHE_CG: |
val = 0x100; |
break; |
case CACHE_CS: |
val = 0x200; |
break; |
case CACHE_CV: |
// case CACHE_WT: |
val = 0x300; |
break; |
default: |
val = 0; |
assert(!"invalid caching mode"); |
break; |
} |
code[0] |= val; |
} |
static inline bool |
uses64bitAddress(const Instruction *ldst) |
{ |
return ldst->src(0).getFile() == FILE_MEMORY_GLOBAL && |
ldst->src(0).isIndirect(0) && |
ldst->getIndirect(0, 0)->reg.size == 8; |
} |
void |
CodeEmitterNVC0::emitSTORE(const Instruction *i) |
{ |
uint32_t opc; |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_GLOBAL: opc = 0x90000000; break; |
case FILE_MEMORY_LOCAL: opc = 0xc8000000; break; |
case FILE_MEMORY_SHARED: opc = 0xc9000000; break; |
default: |
assert(!"invalid memory file"); |
opc = 0; |
break; |
} |
code[0] = 0x00000005; |
code[1] = opc; |
setAddressByFile(i->src(0)); |
srcId(i->src(1), 14); |
srcId(i->src(0).getIndirect(0), 20); |
if (uses64bitAddress(i)) |
code[1] |= 1 << 26; |
emitPredicate(i); |
emitLoadStoreType(i->dType); |
emitCachingMode(i->cache); |
} |
void |
CodeEmitterNVC0::emitLOAD(const Instruction *i) |
{ |
uint32_t opc; |
code[0] = 0x00000005; |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_GLOBAL: opc = 0x80000000; break; |
case FILE_MEMORY_LOCAL: opc = 0xc0000000; break; |
case FILE_MEMORY_SHARED: opc = 0xc1000000; break; |
case FILE_MEMORY_CONST: |
if (!i->src(0).isIndirect(0) && typeSizeof(i->dType) == 4) { |
emitMOV(i); // not sure if this is any better |
return; |
} |
opc = 0x14000000 | (i->src(0).get()->reg.fileIndex << 10); |
code[0] = 0x00000006 | (i->subOp << 8); |
break; |
default: |
assert(!"invalid memory file"); |
opc = 0; |
break; |
} |
code[1] = opc; |
defId(i->def(0), 14); |
setAddressByFile(i->src(0)); |
srcId(i->src(0).getIndirect(0), 20); |
if (uses64bitAddress(i)) |
code[1] |= 1 << 26; |
emitPredicate(i); |
emitLoadStoreType(i->dType); |
emitCachingMode(i->cache); |
} |
uint8_t |
CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref) |
{ |
switch (SDATA(ref).sv.sv) { |
case SV_LANEID: return 0x00; |
case SV_PHYSID: return 0x03; |
case SV_VERTEX_COUNT: return 0x10; |
case SV_INVOCATION_ID: return 0x11; |
case SV_YDIR: return 0x12; |
case SV_TID: return 0x21 + SDATA(ref).sv.index; |
case SV_CTAID: return 0x25 + SDATA(ref).sv.index; |
case SV_NTID: return 0x29 + SDATA(ref).sv.index; |
case SV_GRIDID: return 0x2c; |
case SV_NCTAID: return 0x2d + SDATA(ref).sv.index; |
case SV_LBASE: return 0x34; |
case SV_SBASE: return 0x30; |
case SV_CLOCK: return 0x50 + SDATA(ref).sv.index; |
default: |
assert(!"no sreg for system value"); |
return 0; |
} |
} |
void |
CodeEmitterNVC0::emitMOV(const Instruction *i) |
{ |
if (i->def(0).getFile() == FILE_PREDICATE) { |
if (i->src(0).getFile() == FILE_GPR) { |
code[0] = 0xfc01c003; |
code[1] = 0x1a8e0000; |
srcId(i->src(0), 20); |
} else { |
code[0] = 0x0001c004; |
code[1] = 0x0c0e0000; |
if (i->src(0).getFile() == FILE_IMMEDIATE) { |
code[0] |= 7 << 20; |
if (!i->getSrc(0)->reg.data.u32) |
code[0] |= 1 << 23; |
} else { |
srcId(i->src(0), 20); |
} |
} |
defId(i->def(0), 17); |
emitPredicate(i); |
} else |
if (i->src(0).getFile() == FILE_SYSTEM_VALUE) { |
uint8_t sr = getSRegEncoding(i->src(0)); |
if (i->encSize == 8) { |
code[0] = 0x00000004 | (sr << 26); |
code[1] = 0x2c000000; |
} else { |
code[0] = 0x40000008 | (sr << 20); |
} |
defId(i->def(0), 14); |
emitPredicate(i); |
} else |
if (i->encSize == 8) { |
uint64_t opc; |
if (i->src(0).getFile() == FILE_IMMEDIATE) |
opc = HEX64(18000000, 000001e2); |
else |
if (i->src(0).getFile() == FILE_PREDICATE) |
opc = HEX64(080e0000, 1c000004); |
else |
opc = HEX64(28000000, 00000004); |
opc |= i->lanes << 5; |
emitForm_B(i, opc); |
} else { |
uint32_t imm; |
if (i->src(0).getFile() == FILE_IMMEDIATE) { |
imm = SDATA(i->src(0)).u32; |
if (imm & 0xfff00000) { |
assert(!(imm & 0x000fffff)); |
code[0] = 0x00000318 | imm; |
} else { |
assert(imm < 0x800 || ((int32_t)imm >= -0x800)); |
code[0] = 0x00000118 | (imm << 20); |
} |
} else { |
code[0] = 0x0028; |
emitShortSrc2(i->src(0)); |
} |
defId(i->def(0), 14); |
emitPredicate(i); |
} |
} |
void |
CodeEmitterNVC0::emitATOM(const Instruction *i) |
{ |
const bool hasDst = i->defExists(0); |
const bool casOrExch = |
i->subOp == NV50_IR_SUBOP_ATOM_EXCH || |
i->subOp == NV50_IR_SUBOP_ATOM_CAS; |
if (i->dType == TYPE_U64) { |
switch (i->subOp) { |
case NV50_IR_SUBOP_ATOM_ADD: |
code[0] = 0x205; |
if (hasDst) |
code[1] = 0x507e0000; |
else |
code[1] = 0x10000000; |
break; |
case NV50_IR_SUBOP_ATOM_EXCH: |
code[0] = 0x305; |
code[1] = 0x507e0000; |
break; |
case NV50_IR_SUBOP_ATOM_CAS: |
code[0] = 0x325; |
code[1] = 0x50000000; |
break; |
default: |
assert(!"invalid u64 red op"); |
break; |
} |
} else |
if (i->dType == TYPE_U32) { |
switch (i->subOp) { |
case NV50_IR_SUBOP_ATOM_EXCH: |
code[0] = 0x105; |
code[1] = 0x507e0000; |
break; |
case NV50_IR_SUBOP_ATOM_CAS: |
code[0] = 0x125; |
code[1] = 0x50000000; |
break; |
default: |
code[0] = 0x5 | (i->subOp << 5); |
if (hasDst) |
code[1] = 0x507e0000; |
else |
code[1] = 0x10000000; |
break; |
} |
} else |
if (i->dType == TYPE_S32) { |
assert(i->subOp <= 2); |
code[0] = 0x205 | (i->subOp << 5); |
if (hasDst) |
code[1] = 0x587e0000; |
else |
code[1] = 0x18000000; |
} else |
if (i->dType == TYPE_F32) { |
assert(i->subOp == NV50_IR_SUBOP_ATOM_ADD); |
code[0] = 0x205; |
if (hasDst) |
code[1] = 0x687e0000; |
else |
code[1] = 0x28000000; |
} |
emitPredicate(i); |
srcId(i->src(1), 14); |
if (hasDst) |
defId(i->def(0), 32 + 11); |
else |
if (casOrExch) |
code[1] |= 63 << 11; |
if (hasDst || casOrExch) { |
const int32_t offset = SDATA(i->src(0)).offset; |
assert(offset < 0x80000 && offset >= -0x80000); |
code[0] |= offset << 26; |
code[1] |= (offset & 0x1ffc0) >> 6; |
code[1] |= (offset & 0xe0000) << 6; |
} else { |
srcAddr32(i->src(0), 26, 0); |
} |
if (i->getIndirect(0, 0)) { |
srcId(i->getIndirect(0, 0), 20); |
if (i->getIndirect(0, 0)->reg.size == 8) |
code[1] |= 1 << 26; |
} else { |
code[0] |= 63 << 20; |
} |
if (i->subOp == NV50_IR_SUBOP_ATOM_CAS) |
srcId(i->src(2), 32 + 17); |
} |
void |
CodeEmitterNVC0::emitMEMBAR(const Instruction *i) |
{ |
switch (NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp)) { |
case NV50_IR_SUBOP_MEMBAR_CTA: code[0] = 0x05; break; |
case NV50_IR_SUBOP_MEMBAR_GL: code[0] = 0x25; break; |
default: |
code[0] = 0x45; |
assert(NV50_IR_SUBOP_MEMBAR_SCOPE(i->subOp) == NV50_IR_SUBOP_MEMBAR_SYS); |
break; |
} |
code[1] = 0xe0000000; |
emitPredicate(i); |
} |
void |
CodeEmitterNVC0::emitCCTL(const Instruction *i) |
{ |
code[0] = 0x00000005 | (i->subOp << 5); |
if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) { |
code[1] = 0x98000000; |
srcAddr32(i->src(0), 28, 2); |
} else { |
code[1] = 0xd0000000; |
setAddress24(i->src(0)); |
} |
if (uses64bitAddress(i)) |
code[1] |= 1 << 26; |
srcId(i->src(0).getIndirect(0), 20); |
emitPredicate(i); |
defId(i, 0, 14); |
} |
void |
CodeEmitterNVC0::emitSUCLAMPMode(uint16_t subOp) |
{ |
uint8_t m; |
switch (subOp & ~NV50_IR_SUBOP_SUCLAMP_2D) { |
case NV50_IR_SUBOP_SUCLAMP_SD(0, 1): m = 0; break; |
case NV50_IR_SUBOP_SUCLAMP_SD(1, 1): m = 1; break; |
case NV50_IR_SUBOP_SUCLAMP_SD(2, 1): m = 2; break; |
case NV50_IR_SUBOP_SUCLAMP_SD(3, 1): m = 3; break; |
case NV50_IR_SUBOP_SUCLAMP_SD(4, 1): m = 4; break; |
case NV50_IR_SUBOP_SUCLAMP_PL(0, 1): m = 5; break; |
case NV50_IR_SUBOP_SUCLAMP_PL(1, 1): m = 6; break; |
case NV50_IR_SUBOP_SUCLAMP_PL(2, 1): m = 7; break; |
case NV50_IR_SUBOP_SUCLAMP_PL(3, 1): m = 8; break; |
case NV50_IR_SUBOP_SUCLAMP_PL(4, 1): m = 9; break; |
case NV50_IR_SUBOP_SUCLAMP_BL(0, 1): m = 10; break; |
case NV50_IR_SUBOP_SUCLAMP_BL(1, 1): m = 11; break; |
case NV50_IR_SUBOP_SUCLAMP_BL(2, 1): m = 12; break; |
case NV50_IR_SUBOP_SUCLAMP_BL(3, 1): m = 13; break; |
case NV50_IR_SUBOP_SUCLAMP_BL(4, 1): m = 14; break; |
default: |
return; |
} |
code[0] |= m << 5; |
if (subOp & NV50_IR_SUBOP_SUCLAMP_2D) |
code[1] |= 1 << 16; |
} |
void |
CodeEmitterNVC0::emitSUCalc(Instruction *i) |
{ |
ImmediateValue *imm = NULL; |
uint64_t opc; |
if (i->srcExists(2)) { |
imm = i->getSrc(2)->asImm(); |
if (imm) |
i->setSrc(2, NULL); // special case, make emitForm_A not assert |
} |
switch (i->op) { |
case OP_SUCLAMP: opc = HEX64(58000000, 00000004); break; |
case OP_SUBFM: opc = HEX64(5c000000, 00000004); break; |
case OP_SUEAU: opc = HEX64(60000000, 00000004); break; |
default: |
assert(0); |
return; |
} |
emitForm_A(i, opc); |
if (i->op == OP_SUCLAMP) { |
if (i->dType == TYPE_S32) |
code[0] |= 1 << 9; |
emitSUCLAMPMode(i->subOp); |
} |
if (i->op == OP_SUBFM && i->subOp == NV50_IR_SUBOP_SUBFM_3D) |
code[1] |= 1 << 16; |
if (i->op != OP_SUEAU) { |
if (i->def(0).getFile() == FILE_PREDICATE) { // p, # |
code[0] |= 63 << 14; |
code[1] |= i->getDef(0)->reg.data.id << 23; |
} else |
if (i->defExists(1)) { // r, p |
assert(i->def(1).getFile() == FILE_PREDICATE); |
code[1] |= i->getDef(1)->reg.data.id << 23; |
} else { // r, # |
code[1] |= 7 << 23; |
} |
} |
if (imm) { |
assert(i->op == OP_SUCLAMP); |
i->setSrc(2, imm); |
code[1] |= (imm->reg.data.u32 & 0x3f) << 17; // sint6 |
} |
} |
void |
CodeEmitterNVC0::emitSUGType(DataType ty) |
{ |
switch (ty) { |
case TYPE_S32: code[1] |= 1 << 13; break; |
case TYPE_U8: code[1] |= 2 << 13; break; |
case TYPE_S8: code[1] |= 3 << 13; break; |
default: |
assert(ty == TYPE_U32); |
break; |
} |
} |
void |
CodeEmitterNVC0::setSUConst16(const Instruction *i, const int s) |
{ |
const uint32_t offset = i->getSrc(s)->reg.data.offset; |
assert(i->src(s).getFile() == FILE_MEMORY_CONST); |
assert(offset == (offset & 0xfffc)); |
code[1] |= 1 << 21; |
code[0] |= offset << 24; |
code[1] |= offset >> 8; |
code[1] |= i->getSrc(s)->reg.fileIndex << 8; |
} |
void |
CodeEmitterNVC0::setSUPred(const Instruction *i, const int s) |
{ |
if (!i->srcExists(s) || (i->predSrc == s)) { |
code[1] |= 0x7 << 17; |
} else { |
if (i->src(s).mod == Modifier(NV50_IR_MOD_NOT)) |
code[1] |= 1 << 20; |
srcId(i->src(s), 32 + 17); |
} |
} |
void |
CodeEmitterNVC0::emitSULDGB(const TexInstruction *i) |
{ |
code[0] = 0x5; |
code[1] = 0xd4000000 | (i->subOp << 15); |
emitLoadStoreType(i->dType); |
emitSUGType(i->sType); |
emitCachingMode(i->cache); |
emitPredicate(i); |
defId(i->def(0), 14); // destination |
srcId(i->src(0), 20); // address |
// format |
if (i->src(1).getFile() == FILE_GPR) |
srcId(i->src(1), 26); |
else |
setSUConst16(i, 1); |
setSUPred(i, 2); |
} |
void |
CodeEmitterNVC0::emitSUSTGx(const TexInstruction *i) |
{ |
code[0] = 0x5; |
code[1] = 0xdc000000 | (i->subOp << 15); |
if (i->op == OP_SUSTP) |
code[1] |= i->tex.mask << 22; |
else |
emitLoadStoreType(i->dType); |
emitSUGType(i->sType); |
emitCachingMode(i->cache); |
emitPredicate(i); |
srcId(i->src(0), 20); // address |
// format |
if (i->src(1).getFile() == FILE_GPR) |
srcId(i->src(1), 26); |
else |
setSUConst16(i, 1); |
srcId(i->src(3), 14); // values |
setSUPred(i, 2); |
} |
void |
CodeEmitterNVC0::emitVectorSubOp(const Instruction *i) |
{ |
switch (NV50_IR_SUBOP_Vn(i->subOp)) { |
case 0: |
code[1] |= (i->subOp & 0x000f) << 12; // vsrc1 |
code[1] |= (i->subOp & 0x00e0) >> 5; // vsrc2 |
code[1] |= (i->subOp & 0x0100) << 7; // vsrc2 |
code[1] |= (i->subOp & 0x3c00) << 13; // vdst |
break; |
case 1: |
code[1] |= (i->subOp & 0x000f) << 8; // v2src1 |
code[1] |= (i->subOp & 0x0010) << 11; // v2src1 |
code[1] |= (i->subOp & 0x01e0) >> 1; // v2src2 |
code[1] |= (i->subOp & 0x0200) << 6; // v2src2 |
code[1] |= (i->subOp & 0x3c00) << 2; // v4dst |
code[1] |= (i->mask & 0x3) << 2; |
break; |
case 2: |
code[1] |= (i->subOp & 0x000f) << 8; // v4src1 |
code[1] |= (i->subOp & 0x01e0) >> 1; // v4src2 |
code[1] |= (i->subOp & 0x3c00) << 2; // v4dst |
code[1] |= (i->mask & 0x3) << 2; |
code[1] |= (i->mask & 0xc) << 21; |
break; |
default: |
assert(0); |
break; |
} |
} |
void |
CodeEmitterNVC0::emitVSHL(const Instruction *i) |
{ |
uint64_t opc = 0x4; |
switch (NV50_IR_SUBOP_Vn(i->subOp)) { |
case 0: opc |= 0xe8ULL << 56; break; |
case 1: opc |= 0xb4ULL << 56; break; |
case 2: opc |= 0x94ULL << 56; break; |
default: |
assert(0); |
break; |
} |
if (NV50_IR_SUBOP_Vn(i->subOp) == 1) { |
if (isSignedType(i->dType)) opc |= 1ULL << 0x2a; |
if (isSignedType(i->sType)) opc |= (1 << 6) | (1 << 5); |
} else { |
if (isSignedType(i->dType)) opc |= 1ULL << 0x39; |
if (isSignedType(i->sType)) opc |= 1 << 6; |
} |
emitForm_A(i, opc); |
emitVectorSubOp(i); |
if (i->saturate) |
code[0] |= 1 << 9; |
if (i->flagsDef >= 0) |
code[1] |= 1 << 16; |
} |
void |
CodeEmitterNVC0::emitPIXLD(const Instruction *i) |
{ |
assert(i->encSize == 8); |
emitForm_A(i, HEX64(10000000, 00000006)); |
code[0] |= i->subOp << 5; |
code[1] |= 0x00e00000; |
} |
bool |
CodeEmitterNVC0::emitInstruction(Instruction *insn) |
{ |
unsigned int size = insn->encSize; |
if (writeIssueDelays && !(codeSize & 0x3f)) |
size += 8; |
if (!insn->encSize) { |
ERROR("skipping unencodable instruction: "); insn->print(); |
return false; |
} else |
if (codeSize + size > codeSizeLimit) { |
ERROR("code emitter output buffer too small\n"); |
return false; |
} |
if (writeIssueDelays) { |
if (!(codeSize & 0x3f)) { |
code[0] = 0x00000007; // cf issue delay "instruction" |
code[1] = 0x20000000; |
code += 2; |
codeSize += 8; |
} |
const unsigned int id = (codeSize & 0x3f) / 8 - 1; |
uint32_t *data = code - (id * 2 + 2); |
if (id <= 2) { |
data[0] |= insn->sched << (id * 8 + 4); |
} else |
if (id == 3) { |
data[0] |= insn->sched << 28; |
data[1] |= insn->sched >> 4; |
} else { |
data[1] |= insn->sched << ((id - 4) * 8 + 4); |
} |
} |
// assert that instructions with multiple defs don't corrupt registers |
for (int d = 0; insn->defExists(d); ++d) |
assert(insn->asTex() || insn->def(d).rep()->reg.data.id >= 0); |
switch (insn->op) { |
case OP_MOV: |
case OP_RDSV: |
emitMOV(insn); |
break; |
case OP_NOP: |
break; |
case OP_LOAD: |
emitLOAD(insn); |
break; |
case OP_STORE: |
emitSTORE(insn); |
break; |
case OP_LINTERP: |
case OP_PINTERP: |
emitINTERP(insn); |
break; |
case OP_VFETCH: |
emitVFETCH(insn); |
break; |
case OP_EXPORT: |
emitEXPORT(insn); |
break; |
case OP_PFETCH: |
emitPFETCH(insn); |
break; |
case OP_EMIT: |
case OP_RESTART: |
emitOUT(insn); |
break; |
case OP_ADD: |
case OP_SUB: |
if (insn->dType == TYPE_F64) |
emitDADD(insn); |
else if (isFloatType(insn->dType)) |
emitFADD(insn); |
else |
emitUADD(insn); |
break; |
case OP_MUL: |
if (insn->dType == TYPE_F64) |
emitDMUL(insn); |
else if (isFloatType(insn->dType)) |
emitFMUL(insn); |
else |
emitUMUL(insn); |
break; |
case OP_MAD: |
case OP_FMA: |
if (insn->dType == TYPE_F64) |
emitDMAD(insn); |
else if (isFloatType(insn->dType)) |
emitFMAD(insn); |
else |
emitIMAD(insn); |
break; |
case OP_SAD: |
emitISAD(insn); |
break; |
case OP_NOT: |
emitNOT(insn); |
break; |
case OP_AND: |
emitLogicOp(insn, 0); |
break; |
case OP_OR: |
emitLogicOp(insn, 1); |
break; |
case OP_XOR: |
emitLogicOp(insn, 2); |
break; |
case OP_SHL: |
case OP_SHR: |
emitShift(insn); |
break; |
case OP_SET: |
case OP_SET_AND: |
case OP_SET_OR: |
case OP_SET_XOR: |
emitSET(insn->asCmp()); |
break; |
case OP_SELP: |
emitSELP(insn); |
break; |
case OP_SLCT: |
emitSLCT(insn->asCmp()); |
break; |
case OP_MIN: |
case OP_MAX: |
emitMINMAX(insn); |
break; |
case OP_ABS: |
case OP_NEG: |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_CVT: |
case OP_SAT: |
emitCVT(insn); |
break; |
case OP_RSQ: |
emitSFnOp(insn, 5 + 2 * insn->subOp); |
break; |
case OP_RCP: |
emitSFnOp(insn, 4 + 2 * insn->subOp); |
break; |
case OP_LG2: |
emitSFnOp(insn, 3); |
break; |
case OP_EX2: |
emitSFnOp(insn, 2); |
break; |
case OP_SIN: |
emitSFnOp(insn, 1); |
break; |
case OP_COS: |
emitSFnOp(insn, 0); |
break; |
case OP_PRESIN: |
case OP_PREEX2: |
emitPreOp(insn); |
break; |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXD: |
case OP_TXF: |
case OP_TXG: |
case OP_TXLQ: |
emitTEX(insn->asTex()); |
break; |
case OP_TXQ: |
emitTXQ(insn->asTex()); |
break; |
case OP_TEXBAR: |
emitTEXBAR(insn); |
break; |
case OP_SUBFM: |
case OP_SUCLAMP: |
case OP_SUEAU: |
emitSUCalc(insn); |
break; |
case OP_MADSP: |
emitMADSP(insn); |
break; |
case OP_SULDB: |
if (targ->getChipset() >= NVISA_GK104_CHIPSET) |
emitSULDGB(insn->asTex()); |
else |
ERROR("SULDB not yet supported on < nve4\n"); |
break; |
case OP_SUSTB: |
case OP_SUSTP: |
if (targ->getChipset() >= NVISA_GK104_CHIPSET) |
emitSUSTGx(insn->asTex()); |
else |
ERROR("SUSTx not yet supported on < nve4\n"); |
break; |
case OP_ATOM: |
emitATOM(insn); |
break; |
case OP_BRA: |
case OP_CALL: |
case OP_PRERET: |
case OP_RET: |
case OP_DISCARD: |
case OP_EXIT: |
case OP_PRECONT: |
case OP_CONT: |
case OP_PREBREAK: |
case OP_BREAK: |
case OP_JOINAT: |
case OP_BRKPT: |
case OP_QUADON: |
case OP_QUADPOP: |
emitFlow(insn); |
break; |
case OP_QUADOP: |
emitQUADOP(insn, insn->subOp, insn->lanes); |
break; |
case OP_DFDX: |
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x66 : 0x99, 0x4); |
break; |
case OP_DFDY: |
emitQUADOP(insn, insn->src(0).mod.neg() ? 0x5a : 0xa5, 0x5); |
break; |
case OP_POPCNT: |
emitPOPC(insn); |
break; |
case OP_INSBF: |
emitINSBF(insn); |
break; |
case OP_EXTBF: |
emitEXTBF(insn); |
break; |
case OP_BFIND: |
emitBFIND(insn); |
break; |
case OP_PERMT: |
emitPERMT(insn); |
break; |
case OP_JOIN: |
emitNOP(insn); |
insn->join = 1; |
break; |
case OP_BAR: |
emitBAR(insn); |
break; |
case OP_MEMBAR: |
emitMEMBAR(insn); |
break; |
case OP_CCTL: |
emitCCTL(insn); |
break; |
case OP_VSHL: |
emitVSHL(insn); |
break; |
case OP_PIXLD: |
emitPIXLD(insn); |
break; |
case OP_PHI: |
case OP_UNION: |
case OP_CONSTRAINT: |
ERROR("operation should have been eliminated"); |
return false; |
case OP_EXP: |
case OP_LOG: |
case OP_SQRT: |
case OP_POW: |
ERROR("operation should have been lowered\n"); |
return false; |
default: |
ERROR("unknow op\n"); |
return false; |
} |
if (insn->join) { |
code[0] |= 0x10; |
assert(insn->encSize == 8); |
} |
code += insn->encSize / 4; |
codeSize += insn->encSize; |
return true; |
} |
uint32_t |
CodeEmitterNVC0::getMinEncodingSize(const Instruction *i) const |
{ |
const Target::OpInfo &info = targ->getOpInfo(i); |
if (writeIssueDelays || info.minEncSize == 8 || 1) |
return 8; |
if (i->ftz || i->saturate || i->join) |
return 8; |
if (i->rnd != ROUND_N) |
return 8; |
if (i->predSrc >= 0 && i->op == OP_MAD) |
return 8; |
if (i->op == OP_PINTERP) { |
if (i->getSampleMode() || 1) // XXX: grr, short op doesn't work |
return 8; |
} else |
if (i->op == OP_MOV && i->lanes != 0xf) { |
return 8; |
} |
for (int s = 0; i->srcExists(s); ++s) { |
if (i->src(s).isIndirect(0)) |
return 8; |
if (i->src(s).getFile() == FILE_MEMORY_CONST) { |
if (SDATA(i->src(s)).offset >= 0x100) |
return 8; |
if (i->getSrc(s)->reg.fileIndex > 1 && |
i->getSrc(s)->reg.fileIndex != 16) |
return 8; |
} else |
if (i->src(s).getFile() == FILE_IMMEDIATE) { |
if (i->dType == TYPE_F32) { |
if (SDATA(i->src(s)).u32 >= 0x100) |
return 8; |
} else { |
if (SDATA(i->src(s)).u32 > 0xff) |
return 8; |
} |
} |
if (i->op == OP_CVT) |
continue; |
if (i->src(s).mod != Modifier(0)) { |
if (i->src(s).mod == Modifier(NV50_IR_MOD_ABS)) |
if (i->op != OP_RSQ) |
return 8; |
if (i->src(s).mod == Modifier(NV50_IR_MOD_NEG)) |
if (i->op != OP_ADD || s != 0) |
return 8; |
} |
} |
return 4; |
} |
// Simplified, erring on safe side. |
class SchedDataCalculator : public Pass |
{ |
public: |
SchedDataCalculator(const Target *targ) : targ(targ) { } |
private: |
struct RegScores |
{ |
struct Resource { |
int st[DATA_FILE_COUNT]; // LD to LD delay 3 |
int ld[DATA_FILE_COUNT]; // ST to ST delay 3 |
int tex; // TEX to non-TEX delay 17 (0x11) |
int sfu; // SFU to SFU delay 3 (except PRE-ops) |
int imul; // integer MUL to MUL delay 3 |
} res; |
struct ScoreData { |
int r[64]; |
int p[8]; |
int c; |
} rd, wr; |
int base; |
void rebase(const int base) |
{ |
const int delta = this->base - base; |
if (!delta) |
return; |
this->base = 0; |
for (int i = 0; i < 64; ++i) { |
rd.r[i] += delta; |
wr.r[i] += delta; |
} |
for (int i = 0; i < 8; ++i) { |
rd.p[i] += delta; |
wr.p[i] += delta; |
} |
rd.c += delta; |
wr.c += delta; |
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { |
res.ld[f] += delta; |
res.st[f] += delta; |
} |
res.sfu += delta; |
res.imul += delta; |
res.tex += delta; |
} |
void wipe() |
{ |
memset(&rd, 0, sizeof(rd)); |
memset(&wr, 0, sizeof(wr)); |
memset(&res, 0, sizeof(res)); |
} |
int getLatest(const ScoreData& d) const |
{ |
int max = 0; |
for (int i = 0; i < 64; ++i) |
if (d.r[i] > max) |
max = d.r[i]; |
for (int i = 0; i < 8; ++i) |
if (d.p[i] > max) |
max = d.p[i]; |
if (d.c > max) |
max = d.c; |
return max; |
} |
inline int getLatestRd() const |
{ |
return getLatest(rd); |
} |
inline int getLatestWr() const |
{ |
return getLatest(wr); |
} |
inline int getLatest() const |
{ |
const int a = getLatestRd(); |
const int b = getLatestWr(); |
int max = MAX2(a, b); |
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { |
max = MAX2(res.ld[f], max); |
max = MAX2(res.st[f], max); |
} |
max = MAX2(res.sfu, max); |
max = MAX2(res.imul, max); |
max = MAX2(res.tex, max); |
return max; |
} |
void setMax(const RegScores *that) |
{ |
for (int i = 0; i < 64; ++i) { |
rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); |
wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); |
} |
for (int i = 0; i < 8; ++i) { |
rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); |
wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); |
} |
rd.c = MAX2(rd.c, that->rd.c); |
wr.c = MAX2(wr.c, that->wr.c); |
for (unsigned int f = 0; f < DATA_FILE_COUNT; ++f) { |
res.ld[f] = MAX2(res.ld[f], that->res.ld[f]); |
res.st[f] = MAX2(res.st[f], that->res.st[f]); |
} |
res.sfu = MAX2(res.sfu, that->res.sfu); |
res.imul = MAX2(res.imul, that->res.imul); |
res.tex = MAX2(res.tex, that->res.tex); |
} |
void print(int cycle) |
{ |
for (int i = 0; i < 64; ++i) { |
if (rd.r[i] > cycle) |
INFO("rd $r%i @ %i\n", i, rd.r[i]); |
if (wr.r[i] > cycle) |
INFO("wr $r%i @ %i\n", i, wr.r[i]); |
} |
for (int i = 0; i < 8; ++i) { |
if (rd.p[i] > cycle) |
INFO("rd $p%i @ %i\n", i, rd.p[i]); |
if (wr.p[i] > cycle) |
INFO("wr $p%i @ %i\n", i, wr.p[i]); |
} |
if (rd.c > cycle) |
INFO("rd $c @ %i\n", rd.c); |
if (wr.c > cycle) |
INFO("wr $c @ %i\n", wr.c); |
if (res.sfu > cycle) |
INFO("sfu @ %i\n", res.sfu); |
if (res.imul > cycle) |
INFO("imul @ %i\n", res.imul); |
if (res.tex > cycle) |
INFO("tex @ %i\n", res.tex); |
} |
}; |
RegScores *score; // for current BB |
std::vector<RegScores> scoreBoards; |
int prevData; |
operation prevOp; |
const Target *targ; |
bool visit(Function *); |
bool visit(BasicBlock *); |
void commitInsn(const Instruction *, int cycle); |
int calcDelay(const Instruction *, int cycle) const; |
void setDelay(Instruction *, int delay, Instruction *next); |
void recordRd(const Value *, const int ready); |
void recordWr(const Value *, const int ready); |
void checkRd(const Value *, int cycle, int& delay) const; |
void checkWr(const Value *, int cycle, int& delay) const; |
int getCycles(const Instruction *, int origDelay) const; |
}; |
void |
SchedDataCalculator::setDelay(Instruction *insn, int delay, Instruction *next) |
{ |
if (insn->op == OP_EXIT || insn->op == OP_RET) |
delay = MAX2(delay, 14); |
if (insn->op == OP_TEXBAR) { |
// TODO: except if results not used before EXIT |
insn->sched = 0xc2; |
} else |
if (insn->op == OP_JOIN || insn->join) { |
insn->sched = 0x00; |
} else |
if (delay >= 0 || prevData == 0x04 || |
!next || !targ->canDualIssue(insn, next)) { |
insn->sched = static_cast<uint8_t>(MAX2(delay, 0)); |
if (prevOp == OP_EXPORT) |
insn->sched |= 0x40; |
else |
insn->sched |= 0x20; |
} else { |
insn->sched = 0x04; // dual-issue |
} |
if (prevData != 0x04 || prevOp != OP_EXPORT) |
if (insn->sched != 0x04 || insn->op == OP_EXPORT) |
prevOp = insn->op; |
prevData = insn->sched; |
} |
int |
SchedDataCalculator::getCycles(const Instruction *insn, int origDelay) const |
{ |
if (insn->sched & 0x80) { |
int c = (insn->sched & 0x0f) * 2 + 1; |
if (insn->op == OP_TEXBAR && origDelay > 0) |
c += origDelay; |
return c; |
} |
if (insn->sched & 0x60) |
return (insn->sched & 0x1f) + 1; |
return (insn->sched == 0x04) ? 0 : 32; |
} |
bool |
SchedDataCalculator::visit(Function *func) |
{ |
scoreBoards.resize(func->cfg.getSize()); |
for (size_t i = 0; i < scoreBoards.size(); ++i) |
scoreBoards[i].wipe(); |
return true; |
} |
bool |
SchedDataCalculator::visit(BasicBlock *bb) |
{ |
Instruction *insn; |
Instruction *next = NULL; |
int cycle = 0; |
prevData = 0x00; |
prevOp = OP_NOP; |
score = &scoreBoards.at(bb->getId()); |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { |
// back branches will wait until all target dependencies are satisfied |
if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized |
continue; |
BasicBlock *in = BasicBlock::get(ei.getNode()); |
if (in->getExit()) { |
if (prevData != 0x04) |
prevData = in->getExit()->sched; |
prevOp = in->getExit()->op; |
} |
score->setMax(&scoreBoards.at(in->getId())); |
} |
if (bb->cfg.incidentCount() > 1) |
prevOp = OP_NOP; |
#ifdef NVC0_DEBUG_SCHED_DATA |
INFO("=== BB:%i initial scores\n", bb->getId()); |
score->print(cycle); |
#endif |
for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) { |
next = insn->next; |
commitInsn(insn, cycle); |
int delay = calcDelay(next, cycle); |
setDelay(insn, delay, next); |
cycle += getCycles(insn, delay); |
#ifdef NVC0_DEBUG_SCHED_DATA |
INFO("cycle %i, sched %02x\n", cycle, insn->sched); |
insn->print(); |
next->print(); |
#endif |
} |
if (!insn) |
return true; |
commitInsn(insn, cycle); |
int bbDelay = -1; |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
BasicBlock *out = BasicBlock::get(ei.getNode()); |
if (ei.getType() != Graph::Edge::BACK) { |
// only test the first instruction of the outgoing block |
next = out->getEntry(); |
if (next) |
bbDelay = MAX2(bbDelay, calcDelay(next, cycle)); |
} else { |
// wait until all dependencies are satisfied |
const int regsFree = score->getLatest(); |
next = out->getFirst(); |
for (int c = cycle; next && c < regsFree; next = next->next) { |
bbDelay = MAX2(bbDelay, calcDelay(next, c)); |
c += getCycles(next, bbDelay); |
} |
next = NULL; |
} |
} |
if (bb->cfg.outgoingCount() != 1) |
next = NULL; |
setDelay(insn, bbDelay, next); |
cycle += getCycles(insn, bbDelay); |
score->rebase(cycle); // common base for initializing out blocks' scores |
return true; |
} |
#define NVE4_MAX_ISSUE_DELAY 0x1f |
int |
SchedDataCalculator::calcDelay(const Instruction *insn, int cycle) const |
{ |
int delay = 0, ready = cycle; |
for (int s = 0; insn->srcExists(s); ++s) |
checkRd(insn->getSrc(s), cycle, delay); |
// WAR & WAW don't seem to matter |
// for (int s = 0; insn->srcExists(s); ++s) |
// recordRd(insn->getSrc(s), cycle); |
switch (Target::getOpClass(insn->op)) { |
case OPCLASS_SFU: |
ready = score->res.sfu; |
break; |
case OPCLASS_ARITH: |
if (insn->op == OP_MUL && !isFloatType(insn->dType)) |
ready = score->res.imul; |
break; |
case OPCLASS_TEXTURE: |
ready = score->res.tex; |
break; |
case OPCLASS_LOAD: |
ready = score->res.ld[insn->src(0).getFile()]; |
break; |
case OPCLASS_STORE: |
ready = score->res.st[insn->src(0).getFile()]; |
break; |
default: |
break; |
} |
if (Target::getOpClass(insn->op) != OPCLASS_TEXTURE) |
ready = MAX2(ready, score->res.tex); |
delay = MAX2(delay, ready - cycle); |
// if can issue next cycle, delay is 0, not 1 |
return MIN2(delay - 1, NVE4_MAX_ISSUE_DELAY); |
} |
void |
SchedDataCalculator::commitInsn(const Instruction *insn, int cycle) |
{ |
const int ready = cycle + targ->getLatency(insn); |
for (int d = 0; insn->defExists(d); ++d) |
recordWr(insn->getDef(d), ready); |
// WAR & WAW don't seem to matter |
// for (int s = 0; insn->srcExists(s); ++s) |
// recordRd(insn->getSrc(s), cycle); |
switch (Target::getOpClass(insn->op)) { |
case OPCLASS_SFU: |
score->res.sfu = cycle + 4; |
break; |
case OPCLASS_ARITH: |
if (insn->op == OP_MUL && !isFloatType(insn->dType)) |
score->res.imul = cycle + 4; |
break; |
case OPCLASS_TEXTURE: |
score->res.tex = cycle + 18; |
break; |
case OPCLASS_LOAD: |
if (insn->src(0).getFile() == FILE_MEMORY_CONST) |
break; |
score->res.ld[insn->src(0).getFile()] = cycle + 4; |
score->res.st[insn->src(0).getFile()] = ready; |
break; |
case OPCLASS_STORE: |
score->res.st[insn->src(0).getFile()] = cycle + 4; |
score->res.ld[insn->src(0).getFile()] = ready; |
break; |
case OPCLASS_OTHER: |
if (insn->op == OP_TEXBAR) |
score->res.tex = cycle; |
break; |
default: |
break; |
} |
#ifdef NVC0_DEBUG_SCHED_DATA |
score->print(cycle); |
#endif |
} |
void |
SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const |
{ |
int ready = cycle; |
int a, b; |
switch (v->reg.file) { |
case FILE_GPR: |
a = v->reg.data.id; |
b = a + v->reg.size / 4; |
for (int r = a; r < b; ++r) |
ready = MAX2(ready, score->rd.r[r]); |
break; |
case FILE_PREDICATE: |
ready = MAX2(ready, score->rd.p[v->reg.data.id]); |
break; |
case FILE_FLAGS: |
ready = MAX2(ready, score->rd.c); |
break; |
case FILE_SHADER_INPUT: |
case FILE_SHADER_OUTPUT: // yes, TCPs can read outputs |
case FILE_MEMORY_LOCAL: |
case FILE_MEMORY_CONST: |
case FILE_MEMORY_SHARED: |
case FILE_MEMORY_GLOBAL: |
case FILE_SYSTEM_VALUE: |
// TODO: any restrictions here ? |
break; |
case FILE_IMMEDIATE: |
break; |
default: |
assert(0); |
break; |
} |
if (cycle < ready) |
delay = MAX2(delay, ready - cycle); |
} |
void |
SchedDataCalculator::checkWr(const Value *v, int cycle, int& delay) const |
{ |
int ready = cycle; |
int a, b; |
switch (v->reg.file) { |
case FILE_GPR: |
a = v->reg.data.id; |
b = a + v->reg.size / 4; |
for (int r = a; r < b; ++r) |
ready = MAX2(ready, score->wr.r[r]); |
break; |
case FILE_PREDICATE: |
ready = MAX2(ready, score->wr.p[v->reg.data.id]); |
break; |
default: |
assert(v->reg.file == FILE_FLAGS); |
ready = MAX2(ready, score->wr.c); |
break; |
} |
if (cycle < ready) |
delay = MAX2(delay, ready - cycle); |
} |
void |
SchedDataCalculator::recordWr(const Value *v, const int ready) |
{ |
int a = v->reg.data.id; |
if (v->reg.file == FILE_GPR) { |
int b = a + v->reg.size / 4; |
for (int r = a; r < b; ++r) |
score->rd.r[r] = ready; |
} else |
// $c, $pX: shorter issue-to-read delay (at least as exec pred and carry) |
if (v->reg.file == FILE_PREDICATE) { |
score->rd.p[a] = ready + 4; |
} else { |
assert(v->reg.file == FILE_FLAGS); |
score->rd.c = ready + 4; |
} |
} |
void |
SchedDataCalculator::recordRd(const Value *v, const int ready) |
{ |
int a = v->reg.data.id; |
if (v->reg.file == FILE_GPR) { |
int b = a + v->reg.size / 4; |
for (int r = a; r < b; ++r) |
score->wr.r[r] = ready; |
} else |
if (v->reg.file == FILE_PREDICATE) { |
score->wr.p[a] = ready; |
} else |
if (v->reg.file == FILE_FLAGS) { |
score->wr.c = ready; |
} |
} |
bool |
calculateSchedDataNVC0(const Target *targ, Function *func) |
{ |
SchedDataCalculator sched(targ); |
return sched.run(func, true, true); |
} |
void |
CodeEmitterNVC0::prepareEmission(Function *func) |
{ |
CodeEmitter::prepareEmission(func); |
if (targ->hasSWSched) |
calculateSchedDataNVC0(targ, func); |
} |
CodeEmitterNVC0::CodeEmitterNVC0(const TargetNVC0 *target) |
: CodeEmitter(target), |
targNVC0(target), |
writeIssueDelays(target->hasSWSched) |
{ |
code = NULL; |
codeSize = codeSizeLimit = 0; |
relocInfo = NULL; |
} |
CodeEmitter * |
TargetNVC0::createCodeEmitterNVC0(Program::Type type) |
{ |
CodeEmitterNVC0 *emit = new CodeEmitterNVC0(this); |
emit->setProgramType(type); |
return emit; |
} |
CodeEmitter * |
TargetNVC0::getCodeEmitter(Program::Type type) |
{ |
if (chipset >= NVISA_GK20A_CHIPSET) |
return createCodeEmitterGK110(type); |
return createCodeEmitterNVC0(type); |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |
---|
0,0 → 1,3334 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "tgsi/tgsi_dump.h" |
#include "tgsi/tgsi_scan.h" |
#include "tgsi/tgsi_util.h" |
#include <set> |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_util.h" |
#include "codegen/nv50_ir_build_util.h" |
namespace tgsi { |
class Source; |
static nv50_ir::operation translateOpcode(uint opcode); |
static nv50_ir::DataFile translateFile(uint file); |
static nv50_ir::TexTarget translateTexture(uint texTarg); |
static nv50_ir::SVSemantic translateSysVal(uint sysval); |
class Instruction |
{ |
public: |
Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } |
class SrcRegister |
{ |
public: |
SrcRegister(const struct tgsi_full_src_register *src) |
: reg(src->Register), |
fsr(src) |
{ } |
SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } |
SrcRegister(const struct tgsi_ind_register& ind) |
: reg(tgsi_util_get_src_from_ind(&ind)), |
fsr(NULL) |
{ } |
struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) |
{ |
struct tgsi_src_register reg; |
memset(®, 0, sizeof(reg)); |
reg.Index = off.Index; |
reg.File = off.File; |
reg.SwizzleX = off.SwizzleX; |
reg.SwizzleY = off.SwizzleY; |
reg.SwizzleZ = off.SwizzleZ; |
return reg; |
} |
SrcRegister(const struct tgsi_texture_offset& off) : |
reg(offsetToSrc(off)), |
fsr(NULL) |
{ } |
uint getFile() const { return reg.File; } |
bool is2D() const { return reg.Dimension; } |
bool isIndirect(int dim) const |
{ |
return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; |
} |
int getIndex(int dim) const |
{ |
return (dim && fsr) ? fsr->Dimension.Index : reg.Index; |
} |
int getSwizzle(int chan) const |
{ |
return tgsi_util_get_src_register_swizzle(®, chan); |
} |
nv50_ir::Modifier getMod(int chan) const; |
SrcRegister getIndirect(int dim) const |
{ |
assert(fsr && isIndirect(dim)); |
if (dim) |
return SrcRegister(fsr->DimIndirect); |
return SrcRegister(fsr->Indirect); |
} |
uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const |
{ |
assert(reg.File == TGSI_FILE_IMMEDIATE); |
assert(!reg.Absolute); |
assert(!reg.Negate); |
return info->immd.data[reg.Index * 4 + getSwizzle(c)]; |
} |
private: |
const struct tgsi_src_register reg; |
const struct tgsi_full_src_register *fsr; |
}; |
class DstRegister |
{ |
public: |
DstRegister(const struct tgsi_full_dst_register *dst) |
: reg(dst->Register), |
fdr(dst) |
{ } |
DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } |
uint getFile() const { return reg.File; } |
bool is2D() const { return reg.Dimension; } |
bool isIndirect(int dim) const |
{ |
return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; |
} |
int getIndex(int dim) const |
{ |
return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; |
} |
unsigned int getMask() const { return reg.WriteMask; } |
bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } |
SrcRegister getIndirect(int dim) const |
{ |
assert(fdr && isIndirect(dim)); |
if (dim) |
return SrcRegister(fdr->DimIndirect); |
return SrcRegister(fdr->Indirect); |
} |
private: |
const struct tgsi_dst_register reg; |
const struct tgsi_full_dst_register *fdr; |
}; |
inline uint getOpcode() const { return insn->Instruction.Opcode; } |
unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } |
unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } |
// mask of used components of source s |
unsigned int srcMask(unsigned int s) const; |
SrcRegister getSrc(unsigned int s) const |
{ |
assert(s < srcCount()); |
return SrcRegister(&insn->Src[s]); |
} |
DstRegister getDst(unsigned int d) const |
{ |
assert(d < dstCount()); |
return DstRegister(&insn->Dst[d]); |
} |
SrcRegister getTexOffset(unsigned int i) const |
{ |
assert(i < TGSI_FULL_MAX_TEX_OFFSETS); |
return SrcRegister(insn->TexOffsets[i]); |
} |
unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } |
bool checkDstSrcAliasing() const; |
inline nv50_ir::operation getOP() const { |
return translateOpcode(getOpcode()); } |
nv50_ir::DataType inferSrcType() const; |
nv50_ir::DataType inferDstType() const; |
nv50_ir::CondCode getSetCond() const; |
nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; |
inline uint getLabel() { return insn->Label.Label; } |
unsigned getSaturate() const { return insn->Instruction.Saturate; } |
void print() const |
{ |
tgsi_dump_instruction(insn, 1); |
} |
private: |
const struct tgsi_full_instruction *insn; |
}; |
unsigned int Instruction::srcMask(unsigned int s) const |
{ |
unsigned int mask = insn->Dst[0].Register.WriteMask; |
switch (insn->Instruction.Opcode) { |
case TGSI_OPCODE_COS: |
case TGSI_OPCODE_SIN: |
return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); |
case TGSI_OPCODE_DP2: |
return 0x3; |
case TGSI_OPCODE_DP3: |
return 0x7; |
case TGSI_OPCODE_DP4: |
case TGSI_OPCODE_DPH: |
case TGSI_OPCODE_KILL_IF: /* WriteMask ignored */ |
return 0xf; |
case TGSI_OPCODE_DST: |
return mask & (s ? 0xa : 0x6); |
case TGSI_OPCODE_EX2: |
case TGSI_OPCODE_EXP: |
case TGSI_OPCODE_LG2: |
case TGSI_OPCODE_LOG: |
case TGSI_OPCODE_POW: |
case TGSI_OPCODE_RCP: |
case TGSI_OPCODE_RSQ: |
case TGSI_OPCODE_SCS: |
return 0x1; |
case TGSI_OPCODE_IF: |
case TGSI_OPCODE_UIF: |
return 0x1; |
case TGSI_OPCODE_LIT: |
return 0xb; |
case TGSI_OPCODE_TEX2: |
case TGSI_OPCODE_TXB2: |
case TGSI_OPCODE_TXL2: |
return (s == 0) ? 0xf : 0x3; |
case TGSI_OPCODE_TEX: |
case TGSI_OPCODE_TXB: |
case TGSI_OPCODE_TXD: |
case TGSI_OPCODE_TXL: |
case TGSI_OPCODE_TXP: |
case TGSI_OPCODE_LODQ: |
{ |
const struct tgsi_instruction_texture *tex = &insn->Texture; |
assert(insn->Instruction.Texture); |
mask = 0x7; |
if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && |
insn->Instruction.Opcode != TGSI_OPCODE_TXD) |
mask |= 0x8; /* bias, lod or proj */ |
switch (tex->Texture) { |
case TGSI_TEXTURE_1D: |
mask &= 0x9; |
break; |
case TGSI_TEXTURE_SHADOW1D: |
mask &= 0xd; |
break; |
case TGSI_TEXTURE_1D_ARRAY: |
case TGSI_TEXTURE_2D: |
case TGSI_TEXTURE_RECT: |
mask &= 0xb; |
break; |
case TGSI_TEXTURE_CUBE_ARRAY: |
case TGSI_TEXTURE_SHADOW2D_ARRAY: |
case TGSI_TEXTURE_SHADOWCUBE: |
case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
mask |= 0x8; |
break; |
default: |
break; |
} |
} |
return mask; |
case TGSI_OPCODE_XPD: |
{ |
unsigned int x = 0; |
if (mask & 1) x |= 0x6; |
if (mask & 2) x |= 0x5; |
if (mask & 4) x |= 0x3; |
return x; |
} |
case TGSI_OPCODE_D2I: |
case TGSI_OPCODE_D2U: |
case TGSI_OPCODE_D2F: |
case TGSI_OPCODE_DSLT: |
case TGSI_OPCODE_DSGE: |
case TGSI_OPCODE_DSEQ: |
case TGSI_OPCODE_DSNE: |
switch (util_bitcount(mask)) { |
case 1: return 0x3; |
case 2: return 0xf; |
default: |
assert(!"unexpected mask"); |
return 0xf; |
} |
case TGSI_OPCODE_I2D: |
case TGSI_OPCODE_U2D: |
case TGSI_OPCODE_F2D: { |
unsigned int x = 0; |
if ((mask & 0x3) == 0x3) |
x |= 1; |
if ((mask & 0xc) == 0xc) |
x |= 2; |
return x; |
} |
default: |
break; |
} |
return mask; |
} |
nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const |
{ |
nv50_ir::Modifier m(0); |
if (reg.Absolute) |
m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); |
if (reg.Negate) |
m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); |
return m; |
} |
static nv50_ir::DataFile translateFile(uint file) |
{ |
switch (file) { |
case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; |
case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; |
case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; |
case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; |
case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; |
case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; |
case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; |
case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; |
case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; |
case TGSI_FILE_SAMPLER: |
case TGSI_FILE_NULL: |
default: |
return nv50_ir::FILE_NULL; |
} |
} |
static nv50_ir::SVSemantic translateSysVal(uint sysval) |
{ |
switch (sysval) { |
case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; |
case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; |
case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; |
case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; |
case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; |
case TGSI_SEMANTIC_GRID_SIZE: return nv50_ir::SV_NCTAID; |
case TGSI_SEMANTIC_BLOCK_ID: return nv50_ir::SV_CTAID; |
case TGSI_SEMANTIC_BLOCK_SIZE: return nv50_ir::SV_NTID; |
case TGSI_SEMANTIC_THREAD_ID: return nv50_ir::SV_TID; |
case TGSI_SEMANTIC_SAMPLEID: return nv50_ir::SV_SAMPLE_INDEX; |
case TGSI_SEMANTIC_SAMPLEPOS: return nv50_ir::SV_SAMPLE_POS; |
case TGSI_SEMANTIC_SAMPLEMASK: return nv50_ir::SV_SAMPLE_MASK; |
case TGSI_SEMANTIC_INVOCATIONID: return nv50_ir::SV_INVOCATION_ID; |
default: |
assert(0); |
return nv50_ir::SV_CLOCK; |
} |
} |
#define NV50_IR_TEX_TARG_CASE(a, b) \ |
case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; |
static nv50_ir::TexTarget translateTexture(uint tex) |
{ |
switch (tex) { |
NV50_IR_TEX_TARG_CASE(1D, 1D); |
NV50_IR_TEX_TARG_CASE(2D, 2D); |
NV50_IR_TEX_TARG_CASE(2D_MSAA, 2D_MS); |
NV50_IR_TEX_TARG_CASE(3D, 3D); |
NV50_IR_TEX_TARG_CASE(CUBE, CUBE); |
NV50_IR_TEX_TARG_CASE(RECT, RECT); |
NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); |
NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); |
NV50_IR_TEX_TARG_CASE(2D_ARRAY_MSAA, 2D_MS_ARRAY); |
NV50_IR_TEX_TARG_CASE(CUBE_ARRAY, CUBE_ARRAY); |
NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); |
NV50_IR_TEX_TARG_CASE(SHADOWCUBE_ARRAY, CUBE_ARRAY_SHADOW); |
NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); |
case TGSI_TEXTURE_UNKNOWN: |
default: |
assert(!"invalid texture target"); |
return nv50_ir::TEX_TARGET_2D; |
} |
} |
nv50_ir::DataType Instruction::inferSrcType() const |
{ |
switch (getOpcode()) { |
case TGSI_OPCODE_UIF: |
case TGSI_OPCODE_AND: |
case TGSI_OPCODE_OR: |
case TGSI_OPCODE_XOR: |
case TGSI_OPCODE_NOT: |
case TGSI_OPCODE_SHL: |
case TGSI_OPCODE_U2F: |
case TGSI_OPCODE_U2D: |
case TGSI_OPCODE_UADD: |
case TGSI_OPCODE_UDIV: |
case TGSI_OPCODE_UMOD: |
case TGSI_OPCODE_UMAD: |
case TGSI_OPCODE_UMUL: |
case TGSI_OPCODE_UMUL_HI: |
case TGSI_OPCODE_UMAX: |
case TGSI_OPCODE_UMIN: |
case TGSI_OPCODE_USEQ: |
case TGSI_OPCODE_USGE: |
case TGSI_OPCODE_USLT: |
case TGSI_OPCODE_USNE: |
case TGSI_OPCODE_USHR: |
case TGSI_OPCODE_UCMP: |
case TGSI_OPCODE_ATOMUADD: |
case TGSI_OPCODE_ATOMXCHG: |
case TGSI_OPCODE_ATOMCAS: |
case TGSI_OPCODE_ATOMAND: |
case TGSI_OPCODE_ATOMOR: |
case TGSI_OPCODE_ATOMXOR: |
case TGSI_OPCODE_ATOMUMIN: |
case TGSI_OPCODE_ATOMUMAX: |
case TGSI_OPCODE_UBFE: |
case TGSI_OPCODE_UMSB: |
return nv50_ir::TYPE_U32; |
case TGSI_OPCODE_I2F: |
case TGSI_OPCODE_I2D: |
case TGSI_OPCODE_IDIV: |
case TGSI_OPCODE_IMUL_HI: |
case TGSI_OPCODE_IMAX: |
case TGSI_OPCODE_IMIN: |
case TGSI_OPCODE_IABS: |
case TGSI_OPCODE_INEG: |
case TGSI_OPCODE_ISGE: |
case TGSI_OPCODE_ISHR: |
case TGSI_OPCODE_ISLT: |
case TGSI_OPCODE_ISSG: |
case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version |
case TGSI_OPCODE_MOD: |
case TGSI_OPCODE_UARL: |
case TGSI_OPCODE_ATOMIMIN: |
case TGSI_OPCODE_ATOMIMAX: |
case TGSI_OPCODE_IBFE: |
case TGSI_OPCODE_IMSB: |
return nv50_ir::TYPE_S32; |
case TGSI_OPCODE_D2F: |
case TGSI_OPCODE_D2I: |
case TGSI_OPCODE_D2U: |
case TGSI_OPCODE_DABS: |
case TGSI_OPCODE_DNEG: |
case TGSI_OPCODE_DADD: |
case TGSI_OPCODE_DMUL: |
case TGSI_OPCODE_DMAX: |
case TGSI_OPCODE_DMIN: |
case TGSI_OPCODE_DSLT: |
case TGSI_OPCODE_DSGE: |
case TGSI_OPCODE_DSEQ: |
case TGSI_OPCODE_DSNE: |
case TGSI_OPCODE_DRCP: |
case TGSI_OPCODE_DSQRT: |
case TGSI_OPCODE_DMAD: |
case TGSI_OPCODE_DFRAC: |
case TGSI_OPCODE_DRSQ: |
case TGSI_OPCODE_DTRUNC: |
case TGSI_OPCODE_DCEIL: |
case TGSI_OPCODE_DFLR: |
case TGSI_OPCODE_DROUND: |
return nv50_ir::TYPE_F64; |
default: |
return nv50_ir::TYPE_F32; |
} |
} |
nv50_ir::DataType Instruction::inferDstType() const |
{ |
switch (getOpcode()) { |
case TGSI_OPCODE_D2U: |
case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; |
case TGSI_OPCODE_D2I: |
case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; |
case TGSI_OPCODE_FSEQ: |
case TGSI_OPCODE_FSGE: |
case TGSI_OPCODE_FSLT: |
case TGSI_OPCODE_FSNE: |
case TGSI_OPCODE_DSEQ: |
case TGSI_OPCODE_DSGE: |
case TGSI_OPCODE_DSLT: |
case TGSI_OPCODE_DSNE: |
return nv50_ir::TYPE_U32; |
case TGSI_OPCODE_I2F: |
case TGSI_OPCODE_U2F: |
case TGSI_OPCODE_D2F: |
return nv50_ir::TYPE_F32; |
case TGSI_OPCODE_I2D: |
case TGSI_OPCODE_U2D: |
case TGSI_OPCODE_F2D: |
return nv50_ir::TYPE_F64; |
default: |
return inferSrcType(); |
} |
} |
nv50_ir::CondCode Instruction::getSetCond() const |
{ |
using namespace nv50_ir; |
switch (getOpcode()) { |
case TGSI_OPCODE_SLT: |
case TGSI_OPCODE_ISLT: |
case TGSI_OPCODE_USLT: |
case TGSI_OPCODE_FSLT: |
case TGSI_OPCODE_DSLT: |
return CC_LT; |
case TGSI_OPCODE_SLE: |
return CC_LE; |
case TGSI_OPCODE_SGE: |
case TGSI_OPCODE_ISGE: |
case TGSI_OPCODE_USGE: |
case TGSI_OPCODE_FSGE: |
case TGSI_OPCODE_DSGE: |
return CC_GE; |
case TGSI_OPCODE_SGT: |
return CC_GT; |
case TGSI_OPCODE_SEQ: |
case TGSI_OPCODE_USEQ: |
case TGSI_OPCODE_FSEQ: |
case TGSI_OPCODE_DSEQ: |
return CC_EQ; |
case TGSI_OPCODE_SNE: |
case TGSI_OPCODE_FSNE: |
case TGSI_OPCODE_DSNE: |
return CC_NEU; |
case TGSI_OPCODE_USNE: |
return CC_NE; |
default: |
return CC_ALWAYS; |
} |
} |
#define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b |
static nv50_ir::operation translateOpcode(uint opcode) |
{ |
switch (opcode) { |
NV50_IR_OPCODE_CASE(ARL, SHL); |
NV50_IR_OPCODE_CASE(MOV, MOV); |
NV50_IR_OPCODE_CASE(RCP, RCP); |
NV50_IR_OPCODE_CASE(RSQ, RSQ); |
NV50_IR_OPCODE_CASE(MUL, MUL); |
NV50_IR_OPCODE_CASE(ADD, ADD); |
NV50_IR_OPCODE_CASE(MIN, MIN); |
NV50_IR_OPCODE_CASE(MAX, MAX); |
NV50_IR_OPCODE_CASE(SLT, SET); |
NV50_IR_OPCODE_CASE(SGE, SET); |
NV50_IR_OPCODE_CASE(MAD, MAD); |
NV50_IR_OPCODE_CASE(SUB, SUB); |
NV50_IR_OPCODE_CASE(FLR, FLOOR); |
NV50_IR_OPCODE_CASE(ROUND, CVT); |
NV50_IR_OPCODE_CASE(EX2, EX2); |
NV50_IR_OPCODE_CASE(LG2, LG2); |
NV50_IR_OPCODE_CASE(POW, POW); |
NV50_IR_OPCODE_CASE(ABS, ABS); |
NV50_IR_OPCODE_CASE(COS, COS); |
NV50_IR_OPCODE_CASE(DDX, DFDX); |
NV50_IR_OPCODE_CASE(DDX_FINE, DFDX); |
NV50_IR_OPCODE_CASE(DDY, DFDY); |
NV50_IR_OPCODE_CASE(DDY_FINE, DFDY); |
NV50_IR_OPCODE_CASE(KILL, DISCARD); |
NV50_IR_OPCODE_CASE(SEQ, SET); |
NV50_IR_OPCODE_CASE(SGT, SET); |
NV50_IR_OPCODE_CASE(SIN, SIN); |
NV50_IR_OPCODE_CASE(SLE, SET); |
NV50_IR_OPCODE_CASE(SNE, SET); |
NV50_IR_OPCODE_CASE(TEX, TEX); |
NV50_IR_OPCODE_CASE(TXD, TXD); |
NV50_IR_OPCODE_CASE(TXP, TEX); |
NV50_IR_OPCODE_CASE(CAL, CALL); |
NV50_IR_OPCODE_CASE(RET, RET); |
NV50_IR_OPCODE_CASE(CMP, SLCT); |
NV50_IR_OPCODE_CASE(TXB, TXB); |
NV50_IR_OPCODE_CASE(DIV, DIV); |
NV50_IR_OPCODE_CASE(TXL, TXL); |
NV50_IR_OPCODE_CASE(CEIL, CEIL); |
NV50_IR_OPCODE_CASE(I2F, CVT); |
NV50_IR_OPCODE_CASE(NOT, NOT); |
NV50_IR_OPCODE_CASE(TRUNC, TRUNC); |
NV50_IR_OPCODE_CASE(SHL, SHL); |
NV50_IR_OPCODE_CASE(AND, AND); |
NV50_IR_OPCODE_CASE(OR, OR); |
NV50_IR_OPCODE_CASE(MOD, MOD); |
NV50_IR_OPCODE_CASE(XOR, XOR); |
NV50_IR_OPCODE_CASE(SAD, SAD); |
NV50_IR_OPCODE_CASE(TXF, TXF); |
NV50_IR_OPCODE_CASE(TXQ, TXQ); |
NV50_IR_OPCODE_CASE(TG4, TXG); |
NV50_IR_OPCODE_CASE(LODQ, TXLQ); |
NV50_IR_OPCODE_CASE(EMIT, EMIT); |
NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); |
NV50_IR_OPCODE_CASE(KILL_IF, DISCARD); |
NV50_IR_OPCODE_CASE(F2I, CVT); |
NV50_IR_OPCODE_CASE(FSEQ, SET); |
NV50_IR_OPCODE_CASE(FSGE, SET); |
NV50_IR_OPCODE_CASE(FSLT, SET); |
NV50_IR_OPCODE_CASE(FSNE, SET); |
NV50_IR_OPCODE_CASE(IDIV, DIV); |
NV50_IR_OPCODE_CASE(IMAX, MAX); |
NV50_IR_OPCODE_CASE(IMIN, MIN); |
NV50_IR_OPCODE_CASE(IABS, ABS); |
NV50_IR_OPCODE_CASE(INEG, NEG); |
NV50_IR_OPCODE_CASE(ISGE, SET); |
NV50_IR_OPCODE_CASE(ISHR, SHR); |
NV50_IR_OPCODE_CASE(ISLT, SET); |
NV50_IR_OPCODE_CASE(F2U, CVT); |
NV50_IR_OPCODE_CASE(U2F, CVT); |
NV50_IR_OPCODE_CASE(UADD, ADD); |
NV50_IR_OPCODE_CASE(UDIV, DIV); |
NV50_IR_OPCODE_CASE(UMAD, MAD); |
NV50_IR_OPCODE_CASE(UMAX, MAX); |
NV50_IR_OPCODE_CASE(UMIN, MIN); |
NV50_IR_OPCODE_CASE(UMOD, MOD); |
NV50_IR_OPCODE_CASE(UMUL, MUL); |
NV50_IR_OPCODE_CASE(USEQ, SET); |
NV50_IR_OPCODE_CASE(USGE, SET); |
NV50_IR_OPCODE_CASE(USHR, SHR); |
NV50_IR_OPCODE_CASE(USLT, SET); |
NV50_IR_OPCODE_CASE(USNE, SET); |
NV50_IR_OPCODE_CASE(DABS, ABS); |
NV50_IR_OPCODE_CASE(DNEG, NEG); |
NV50_IR_OPCODE_CASE(DADD, ADD); |
NV50_IR_OPCODE_CASE(DMUL, MUL); |
NV50_IR_OPCODE_CASE(DMAX, MAX); |
NV50_IR_OPCODE_CASE(DMIN, MIN); |
NV50_IR_OPCODE_CASE(DSLT, SET); |
NV50_IR_OPCODE_CASE(DSGE, SET); |
NV50_IR_OPCODE_CASE(DSEQ, SET); |
NV50_IR_OPCODE_CASE(DSNE, SET); |
NV50_IR_OPCODE_CASE(DRCP, RCP); |
NV50_IR_OPCODE_CASE(DSQRT, SQRT); |
NV50_IR_OPCODE_CASE(DMAD, MAD); |
NV50_IR_OPCODE_CASE(D2I, CVT); |
NV50_IR_OPCODE_CASE(D2U, CVT); |
NV50_IR_OPCODE_CASE(I2D, CVT); |
NV50_IR_OPCODE_CASE(U2D, CVT); |
NV50_IR_OPCODE_CASE(DRSQ, RSQ); |
NV50_IR_OPCODE_CASE(DTRUNC, TRUNC); |
NV50_IR_OPCODE_CASE(DCEIL, CEIL); |
NV50_IR_OPCODE_CASE(DFLR, FLOOR); |
NV50_IR_OPCODE_CASE(DROUND, CVT); |
NV50_IR_OPCODE_CASE(IMUL_HI, MUL); |
NV50_IR_OPCODE_CASE(UMUL_HI, MUL); |
NV50_IR_OPCODE_CASE(SAMPLE, TEX); |
NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); |
NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); |
NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); |
NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); |
NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); |
NV50_IR_OPCODE_CASE(SAMPLE_I, TXF); |
NV50_IR_OPCODE_CASE(SAMPLE_I_MS, TXF); |
NV50_IR_OPCODE_CASE(GATHER4, TXG); |
NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); |
NV50_IR_OPCODE_CASE(ATOMUADD, ATOM); |
NV50_IR_OPCODE_CASE(ATOMXCHG, ATOM); |
NV50_IR_OPCODE_CASE(ATOMCAS, ATOM); |
NV50_IR_OPCODE_CASE(ATOMAND, ATOM); |
NV50_IR_OPCODE_CASE(ATOMOR, ATOM); |
NV50_IR_OPCODE_CASE(ATOMXOR, ATOM); |
NV50_IR_OPCODE_CASE(ATOMUMIN, ATOM); |
NV50_IR_OPCODE_CASE(ATOMUMAX, ATOM); |
NV50_IR_OPCODE_CASE(ATOMIMIN, ATOM); |
NV50_IR_OPCODE_CASE(ATOMIMAX, ATOM); |
NV50_IR_OPCODE_CASE(TEX2, TEX); |
NV50_IR_OPCODE_CASE(TXB2, TXB); |
NV50_IR_OPCODE_CASE(TXL2, TXL); |
NV50_IR_OPCODE_CASE(IBFE, EXTBF); |
NV50_IR_OPCODE_CASE(UBFE, EXTBF); |
NV50_IR_OPCODE_CASE(BFI, INSBF); |
NV50_IR_OPCODE_CASE(BREV, EXTBF); |
NV50_IR_OPCODE_CASE(POPC, POPCNT); |
NV50_IR_OPCODE_CASE(LSB, BFIND); |
NV50_IR_OPCODE_CASE(IMSB, BFIND); |
NV50_IR_OPCODE_CASE(UMSB, BFIND); |
NV50_IR_OPCODE_CASE(END, EXIT); |
default: |
return nv50_ir::OP_NOP; |
} |
} |
static uint16_t opcodeToSubOp(uint opcode) |
{ |
switch (opcode) { |
case TGSI_OPCODE_LFENCE: return NV50_IR_SUBOP_MEMBAR(L, GL); |
case TGSI_OPCODE_SFENCE: return NV50_IR_SUBOP_MEMBAR(S, GL); |
case TGSI_OPCODE_MFENCE: return NV50_IR_SUBOP_MEMBAR(M, GL); |
case TGSI_OPCODE_ATOMUADD: return NV50_IR_SUBOP_ATOM_ADD; |
case TGSI_OPCODE_ATOMXCHG: return NV50_IR_SUBOP_ATOM_EXCH; |
case TGSI_OPCODE_ATOMCAS: return NV50_IR_SUBOP_ATOM_CAS; |
case TGSI_OPCODE_ATOMAND: return NV50_IR_SUBOP_ATOM_AND; |
case TGSI_OPCODE_ATOMOR: return NV50_IR_SUBOP_ATOM_OR; |
case TGSI_OPCODE_ATOMXOR: return NV50_IR_SUBOP_ATOM_XOR; |
case TGSI_OPCODE_ATOMUMIN: return NV50_IR_SUBOP_ATOM_MIN; |
case TGSI_OPCODE_ATOMIMIN: return NV50_IR_SUBOP_ATOM_MIN; |
case TGSI_OPCODE_ATOMUMAX: return NV50_IR_SUBOP_ATOM_MAX; |
case TGSI_OPCODE_ATOMIMAX: return NV50_IR_SUBOP_ATOM_MAX; |
case TGSI_OPCODE_IMUL_HI: |
case TGSI_OPCODE_UMUL_HI: |
return NV50_IR_SUBOP_MUL_HIGH; |
default: |
return 0; |
} |
} |
bool Instruction::checkDstSrcAliasing() const |
{ |
if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory |
return false; |
for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { |
if (insn->Src[s].Register.File == TGSI_FILE_NULL) |
break; |
if (insn->Src[s].Register.File == insn->Dst[0].Register.File && |
insn->Src[s].Register.Index == insn->Dst[0].Register.Index) |
return true; |
} |
return false; |
} |
class Source |
{ |
public: |
Source(struct nv50_ir_prog_info *); |
~Source(); |
public: |
bool scanSource(); |
unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } |
public: |
struct tgsi_shader_info scan; |
struct tgsi_full_instruction *insns; |
const struct tgsi_token *tokens; |
struct nv50_ir_prog_info *info; |
nv50_ir::DynArray tempArrays; |
nv50_ir::DynArray immdArrays; |
typedef nv50_ir::BuildUtil::Location Location; |
// these registers are per-subroutine, cannot be used for parameter passing |
std::set<Location> locals; |
bool mainTempsInLMem; |
int clipVertexOutput; |
struct TextureView { |
uint8_t target; // TGSI_TEXTURE_* |
}; |
std::vector<TextureView> textureViews; |
struct Resource { |
uint8_t target; // TGSI_TEXTURE_* |
bool raw; |
uint8_t slot; // $surface index |
}; |
std::vector<Resource> resources; |
private: |
int inferSysValDirection(unsigned sn) const; |
bool scanDeclaration(const struct tgsi_full_declaration *); |
bool scanInstruction(const struct tgsi_full_instruction *); |
void scanProperty(const struct tgsi_full_property *); |
void scanImmediate(const struct tgsi_full_immediate *); |
inline bool isEdgeFlagPassthrough(const Instruction&) const; |
}; |
Source::Source(struct nv50_ir_prog_info *prog) : info(prog) |
{ |
tokens = (const struct tgsi_token *)info->bin.source; |
if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) |
tgsi_dump(tokens, 0); |
mainTempsInLMem = FALSE; |
} |
Source::~Source() |
{ |
if (insns) |
FREE(insns); |
if (info->immd.data) |
FREE(info->immd.data); |
if (info->immd.type) |
FREE(info->immd.type); |
} |
bool Source::scanSource() |
{ |
unsigned insnCount = 0; |
struct tgsi_parse_context parse; |
tgsi_scan_shader(tokens, &scan); |
insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * |
sizeof(insns[0])); |
if (!insns) |
return false; |
clipVertexOutput = -1; |
textureViews.resize(scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1); |
resources.resize(scan.file_max[TGSI_FILE_RESOURCE] + 1); |
info->immd.bufSize = 0; |
info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; |
info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; |
info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; |
if (info->type == PIPE_SHADER_FRAGMENT) { |
info->prop.fp.writesDepth = scan.writes_z; |
info->prop.fp.usesDiscard = scan.uses_kill; |
} else |
if (info->type == PIPE_SHADER_GEOMETRY) { |
info->prop.gp.instanceCount = 1; // default value |
} |
info->io.viewportId = -1; |
info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); |
info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); |
tgsi_parse_init(&parse, tokens); |
while (!tgsi_parse_end_of_tokens(&parse)) { |
tgsi_parse_token(&parse); |
switch (parse.FullToken.Token.Type) { |
case TGSI_TOKEN_TYPE_IMMEDIATE: |
scanImmediate(&parse.FullToken.FullImmediate); |
break; |
case TGSI_TOKEN_TYPE_DECLARATION: |
scanDeclaration(&parse.FullToken.FullDeclaration); |
break; |
case TGSI_TOKEN_TYPE_INSTRUCTION: |
insns[insnCount++] = parse.FullToken.FullInstruction; |
scanInstruction(&parse.FullToken.FullInstruction); |
break; |
case TGSI_TOKEN_TYPE_PROPERTY: |
scanProperty(&parse.FullToken.FullProperty); |
break; |
default: |
INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); |
break; |
} |
} |
tgsi_parse_free(&parse); |
if (mainTempsInLMem) |
info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; |
if (info->io.genUserClip > 0) { |
info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; |
const unsigned int nOut = (info->io.genUserClip + 3) / 4; |
for (unsigned int n = 0; n < nOut; ++n) { |
unsigned int i = info->numOutputs++; |
info->out[i].id = i; |
info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; |
info->out[i].si = n; |
info->out[i].mask = info->io.clipDistanceMask >> (n * 4); |
} |
} |
return info->assignSlots(info) == 0; |
} |
void Source::scanProperty(const struct tgsi_full_property *prop) |
{ |
switch (prop->Property.PropertyName) { |
case TGSI_PROPERTY_GS_OUTPUT_PRIM: |
info->prop.gp.outputPrim = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_INPUT_PRIM: |
info->prop.gp.inputPrim = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: |
info->prop.gp.maxVertices = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_GS_INVOCATIONS: |
info->prop.gp.instanceCount = prop->u[0].Data; |
break; |
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: |
info->prop.fp.separateFragData = TRUE; |
break; |
case TGSI_PROPERTY_FS_COORD_ORIGIN: |
case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: |
// we don't care |
break; |
case TGSI_PROPERTY_VS_PROHIBIT_UCPS: |
info->io.genUserClip = -1; |
break; |
default: |
INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); |
break; |
} |
} |
void Source::scanImmediate(const struct tgsi_full_immediate *imm) |
{ |
const unsigned n = info->immd.count++; |
assert(n < scan.immediate_count); |
for (int c = 0; c < 4; ++c) |
info->immd.data[n * 4 + c] = imm->u[c].Uint; |
info->immd.type[n] = imm->Immediate.DataType; |
} |
int Source::inferSysValDirection(unsigned sn) const |
{ |
switch (sn) { |
case TGSI_SEMANTIC_INSTANCEID: |
case TGSI_SEMANTIC_VERTEXID: |
return 1; |
case TGSI_SEMANTIC_LAYER: |
#if 0 |
case TGSI_SEMANTIC_VIEWPORTINDEX: |
return 0; |
#endif |
case TGSI_SEMANTIC_PRIMID: |
return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; |
default: |
return 0; |
} |
} |
bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) |
{ |
unsigned i, c; |
unsigned sn = TGSI_SEMANTIC_GENERIC; |
unsigned si = 0; |
const unsigned first = decl->Range.First, last = decl->Range.Last; |
if (decl->Declaration.Semantic) { |
sn = decl->Semantic.Name; |
si = decl->Semantic.Index; |
} |
if (decl->Declaration.Local) { |
for (i = first; i <= last; ++i) { |
for (c = 0; c < 4; ++c) { |
locals.insert( |
Location(decl->Declaration.File, decl->Dim.Index2D, i, c)); |
} |
} |
} |
switch (decl->Declaration.File) { |
case TGSI_FILE_INPUT: |
if (info->type == PIPE_SHADER_VERTEX) { |
// all vertex attributes are equal |
for (i = first; i <= last; ++i) { |
info->in[i].sn = TGSI_SEMANTIC_GENERIC; |
info->in[i].si = i; |
} |
} else { |
for (i = first; i <= last; ++i, ++si) { |
info->in[i].id = i; |
info->in[i].sn = sn; |
info->in[i].si = si; |
if (info->type == PIPE_SHADER_FRAGMENT) { |
// translate interpolation mode |
switch (decl->Interp.Interpolate) { |
case TGSI_INTERPOLATE_CONSTANT: |
info->in[i].flat = 1; |
break; |
case TGSI_INTERPOLATE_COLOR: |
info->in[i].sc = 1; |
break; |
case TGSI_INTERPOLATE_LINEAR: |
info->in[i].linear = 1; |
break; |
default: |
break; |
} |
if (decl->Interp.Location || info->io.sampleInterp) |
info->in[i].centroid = 1; |
} |
} |
} |
break; |
case TGSI_FILE_OUTPUT: |
for (i = first; i <= last; ++i, ++si) { |
switch (sn) { |
case TGSI_SEMANTIC_POSITION: |
if (info->type == PIPE_SHADER_FRAGMENT) |
info->io.fragDepth = i; |
else |
if (clipVertexOutput < 0) |
clipVertexOutput = i; |
break; |
case TGSI_SEMANTIC_COLOR: |
if (info->type == PIPE_SHADER_FRAGMENT) |
info->prop.fp.numColourResults++; |
break; |
case TGSI_SEMANTIC_EDGEFLAG: |
info->io.edgeFlagOut = i; |
break; |
case TGSI_SEMANTIC_CLIPVERTEX: |
clipVertexOutput = i; |
break; |
case TGSI_SEMANTIC_CLIPDIST: |
info->io.clipDistanceMask |= |
decl->Declaration.UsageMask << (si * 4); |
info->io.genUserClip = -1; |
break; |
case TGSI_SEMANTIC_SAMPLEMASK: |
info->io.sampleMask = i; |
break; |
case TGSI_SEMANTIC_VIEWPORT_INDEX: |
info->io.viewportId = i; |
break; |
default: |
break; |
} |
info->out[i].id = i; |
info->out[i].sn = sn; |
info->out[i].si = si; |
} |
break; |
case TGSI_FILE_SYSTEM_VALUE: |
switch (sn) { |
case TGSI_SEMANTIC_INSTANCEID: |
info->io.instanceId = first; |
break; |
case TGSI_SEMANTIC_VERTEXID: |
info->io.vertexId = first; |
break; |
default: |
break; |
} |
for (i = first; i <= last; ++i, ++si) { |
info->sv[i].sn = sn; |
info->sv[i].si = si; |
info->sv[i].input = inferSysValDirection(sn); |
} |
break; |
case TGSI_FILE_RESOURCE: |
for (i = first; i <= last; ++i) { |
resources[i].target = decl->Resource.Resource; |
resources[i].raw = decl->Resource.Raw; |
resources[i].slot = i; |
} |
break; |
case TGSI_FILE_SAMPLER_VIEW: |
for (i = first; i <= last; ++i) |
textureViews[i].target = decl->SamplerView.Resource; |
break; |
case TGSI_FILE_NULL: |
case TGSI_FILE_TEMPORARY: |
case TGSI_FILE_ADDRESS: |
case TGSI_FILE_CONSTANT: |
case TGSI_FILE_IMMEDIATE: |
case TGSI_FILE_PREDICATE: |
case TGSI_FILE_SAMPLER: |
break; |
default: |
ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); |
return false; |
} |
return true; |
} |
inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const |
{ |
return insn.getOpcode() == TGSI_OPCODE_MOV && |
insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && |
insn.getSrc(0).getFile() == TGSI_FILE_INPUT; |
} |
bool Source::scanInstruction(const struct tgsi_full_instruction *inst) |
{ |
Instruction insn(inst); |
if (insn.getOpcode() == TGSI_OPCODE_BARRIER) |
info->numBarriers = 1; |
if (insn.dstCount()) { |
if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { |
Instruction::DstRegister dst = insn.getDst(0); |
if (dst.isIndirect(0)) |
for (unsigned i = 0; i < info->numOutputs; ++i) |
info->out[i].mask = 0xf; |
else |
info->out[dst.getIndex(0)].mask |= dst.getMask(); |
if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || |
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || |
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_LAYER || |
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_VIEWPORT_INDEX || |
info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) |
info->out[dst.getIndex(0)].mask &= 1; |
if (isEdgeFlagPassthrough(insn)) |
info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); |
} else |
if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { |
if (insn.getDst(0).isIndirect(0)) |
mainTempsInLMem = TRUE; |
} |
} |
for (unsigned s = 0; s < insn.srcCount(); ++s) { |
Instruction::SrcRegister src = insn.getSrc(s); |
if (src.getFile() == TGSI_FILE_TEMPORARY) { |
if (src.isIndirect(0)) |
mainTempsInLMem = TRUE; |
} else |
if (src.getFile() == TGSI_FILE_RESOURCE) { |
if (src.getIndex(0) == TGSI_RESOURCE_GLOBAL) |
info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ? |
0x1 : 0x2; |
} |
if (src.getFile() != TGSI_FILE_INPUT) |
continue; |
unsigned mask = insn.srcMask(s); |
if (src.isIndirect(0)) { |
for (unsigned i = 0; i < info->numInputs; ++i) |
info->in[i].mask = 0xf; |
} else { |
const int i = src.getIndex(0); |
for (unsigned c = 0; c < 4; ++c) { |
if (!(mask & (1 << c))) |
continue; |
int k = src.getSwizzle(c); |
if (k <= TGSI_SWIZZLE_W) |
info->in[i].mask |= 1 << k; |
} |
switch (info->in[i].sn) { |
case TGSI_SEMANTIC_PSIZE: |
case TGSI_SEMANTIC_PRIMID: |
case TGSI_SEMANTIC_FOG: |
info->in[i].mask &= 0x1; |
break; |
case TGSI_SEMANTIC_PCOORD: |
info->in[i].mask &= 0x3; |
break; |
default: |
break; |
} |
} |
} |
return true; |
} |
nv50_ir::TexInstruction::Target |
Instruction::getTexture(const tgsi::Source *code, int s) const |
{ |
// XXX: indirect access |
unsigned int r; |
switch (getSrc(s).getFile()) { |
case TGSI_FILE_RESOURCE: |
r = getSrc(s).getIndex(0); |
return translateTexture(code->resources.at(r).target); |
case TGSI_FILE_SAMPLER_VIEW: |
r = getSrc(s).getIndex(0); |
return translateTexture(code->textureViews.at(r).target); |
default: |
return translateTexture(insn->Texture.Texture); |
} |
} |
} // namespace tgsi |
namespace { |
using namespace nv50_ir; |
class Converter : public BuildUtil |
{ |
public: |
Converter(Program *, const tgsi::Source *); |
~Converter(); |
bool run(); |
private: |
struct Subroutine |
{ |
Subroutine(Function *f) : f(f) { } |
Function *f; |
ValueMap values; |
}; |
Value *shiftAddress(Value *); |
Value *getVertexBase(int s); |
DataArray *getArrayForFile(unsigned file, int idx); |
Value *fetchSrc(int s, int c); |
Value *acquireDst(int d, int c); |
void storeDst(int d, int c, Value *); |
Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); |
void storeDst(const tgsi::Instruction::DstRegister dst, int c, |
Value *val, Value *ptr); |
Value *applySrcMod(Value *, int s, int c); |
Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); |
Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); |
Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); |
bool handleInstruction(const struct tgsi_full_instruction *); |
void exportOutputs(); |
inline Subroutine *getSubroutine(unsigned ip); |
inline Subroutine *getSubroutine(Function *); |
inline bool isEndOfSubroutine(uint ip); |
void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); |
// R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) |
void setTexRS(TexInstruction *, unsigned int& s, int R, int S); |
void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); |
void handleTXF(Value *dst0[4], int R, int L_M); |
void handleTXQ(Value *dst0[4], enum TexQuery); |
void handleLIT(Value *dst0[4]); |
void handleUserClipPlanes(); |
Symbol *getResourceBase(int r); |
void getResourceCoords(std::vector<Value *>&, int r, int s); |
void handleLOAD(Value *dst0[4]); |
void handleSTORE(); |
void handleATOM(Value *dst0[4], DataType, uint16_t subOp); |
void handleINTERP(Value *dst0[4]); |
Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); |
void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); |
Value *buildDot(int dim); |
class BindArgumentsPass : public Pass { |
public: |
BindArgumentsPass(Converter &conv) : conv(conv) { } |
private: |
Converter &conv; |
Subroutine *sub; |
inline const Location *getValueLocation(Subroutine *, Value *); |
template<typename T> inline void |
updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), |
T (Function::*proto)); |
template<typename T> inline void |
updatePrototype(BitSet *set, void (Function::*updateSet)(), |
T (Function::*proto)); |
protected: |
bool visit(Function *); |
bool visit(BasicBlock *bb) { return false; } |
}; |
private: |
const tgsi::Source *code; |
const struct nv50_ir_prog_info *info; |
struct { |
std::map<unsigned, Subroutine> map; |
Subroutine *cur; |
} sub; |
uint ip; // instruction pointer |
tgsi::Instruction tgsi; |
DataType dstTy; |
DataType srcTy; |
DataArray tData; // TGSI_FILE_TEMPORARY |
DataArray aData; // TGSI_FILE_ADDRESS |
DataArray pData; // TGSI_FILE_PREDICATE |
DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) |
Value *zero; |
Value *fragCoord[4]; |
Value *clipVtx[4]; |
Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) |
uint8_t vtxBaseValid; |
Stack condBBs; // fork BB, then else clause BB |
Stack joinBBs; // fork BB, for inserting join ops on ENDIF |
Stack loopBBs; // loop headers |
Stack breakBBs; // end of / after loop |
Value *viewport; |
}; |
Symbol * |
Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) |
{ |
const int swz = src.getSwizzle(c); |
/* TODO: Use Array ID when it's available for the index */ |
return makeSym(src.getFile(), |
src.is2D() ? src.getIndex(1) : 0, |
src.getIndex(0), swz, |
src.getIndex(0) * 16 + swz * 4); |
} |
Symbol * |
Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) |
{ |
/* TODO: Use Array ID when it's available for the index */ |
return makeSym(dst.getFile(), |
dst.is2D() ? dst.getIndex(1) : 0, |
dst.getIndex(0), c, |
dst.getIndex(0) * 16 + c * 4); |
} |
Symbol * |
Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) |
{ |
Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); |
sym->reg.fileIndex = fileIdx; |
if (idx >= 0) { |
if (sym->reg.file == FILE_SHADER_INPUT) |
sym->setOffset(info->in[idx].slot[c] * 4); |
else |
if (sym->reg.file == FILE_SHADER_OUTPUT) |
sym->setOffset(info->out[idx].slot[c] * 4); |
else |
if (sym->reg.file == FILE_SYSTEM_VALUE) |
sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); |
else |
sym->setOffset(address); |
} else { |
sym->setOffset(address); |
} |
return sym; |
} |
static inline uint8_t |
translateInterpMode(const struct nv50_ir_varying *var, operation& op) |
{ |
uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; |
if (var->flat) |
mode = NV50_IR_INTERP_FLAT; |
else |
if (var->linear) |
mode = NV50_IR_INTERP_LINEAR; |
else |
if (var->sc) |
mode = NV50_IR_INTERP_SC; |
op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) |
? OP_PINTERP : OP_LINTERP; |
if (var->centroid) |
mode |= NV50_IR_INTERP_CENTROID; |
return mode; |
} |
Value * |
Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
{ |
operation op; |
// XXX: no way to know interpolation mode if we don't know what's accessed |
const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : |
src.getIndex(0)], op); |
Instruction *insn = new_Instruction(func, op, TYPE_F32); |
insn->setDef(0, getScratch()); |
insn->setSrc(0, srcToSym(src, c)); |
if (op == OP_PINTERP) |
insn->setSrc(1, fragCoord[3]); |
if (ptr) |
insn->setIndirect(0, 0, ptr); |
insn->setInterpolate(mode); |
bb->insertTail(insn); |
return insn->getDef(0); |
} |
Value * |
Converter::applySrcMod(Value *val, int s, int c) |
{ |
Modifier m = tgsi.getSrc(s).getMod(c); |
DataType ty = tgsi.inferSrcType(); |
if (m & Modifier(NV50_IR_MOD_ABS)) |
val = mkOp1v(OP_ABS, ty, getScratch(), val); |
if (m & Modifier(NV50_IR_MOD_NEG)) |
val = mkOp1v(OP_NEG, ty, getScratch(), val); |
return val; |
} |
Value * |
Converter::getVertexBase(int s) |
{ |
assert(s < 5); |
if (!(vtxBaseValid & (1 << s))) { |
const int index = tgsi.getSrc(s).getIndex(1); |
Value *rel = NULL; |
if (tgsi.getSrc(s).isIndirect(1)) |
rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); |
vtxBaseValid |= 1 << s; |
vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), |
mkImm(index), rel); |
} |
return vtxBase[s]; |
} |
Value * |
Converter::fetchSrc(int s, int c) |
{ |
Value *res; |
Value *ptr = NULL, *dimRel = NULL; |
tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); |
if (src.isIndirect(0)) |
ptr = fetchSrc(src.getIndirect(0), 0, NULL); |
if (src.is2D()) { |
switch (src.getFile()) { |
case TGSI_FILE_INPUT: |
dimRel = getVertexBase(s); |
break; |
case TGSI_FILE_CONSTANT: |
// on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] |
if (src.isIndirect(1)) |
dimRel = fetchSrc(src.getIndirect(1), 0, 0); |
break; |
default: |
break; |
} |
} |
res = fetchSrc(src, c, ptr); |
if (dimRel) |
res->getInsn()->setIndirect(0, 1, dimRel); |
return applySrcMod(res, s, c); |
} |
Converter::DataArray * |
Converter::getArrayForFile(unsigned file, int idx) |
{ |
switch (file) { |
case TGSI_FILE_TEMPORARY: |
return &tData; |
case TGSI_FILE_PREDICATE: |
return &pData; |
case TGSI_FILE_ADDRESS: |
return &aData; |
case TGSI_FILE_OUTPUT: |
assert(prog->getType() == Program::TYPE_FRAGMENT); |
return &oData; |
default: |
assert(!"invalid/unhandled TGSI source file"); |
return NULL; |
} |
} |
Value * |
Converter::shiftAddress(Value *index) |
{ |
if (!index) |
return NULL; |
return mkOp2v(OP_SHL, TYPE_U32, getSSA(4, FILE_ADDRESS), index, mkImm(4)); |
} |
Value * |
Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) |
{ |
const int idx2d = src.is2D() ? src.getIndex(1) : 0; |
const int idx = src.getIndex(0); |
const int swz = src.getSwizzle(c); |
switch (src.getFile()) { |
case TGSI_FILE_IMMEDIATE: |
assert(!ptr); |
return loadImm(NULL, info->immd.data[idx * 4 + swz]); |
case TGSI_FILE_CONSTANT: |
return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); |
case TGSI_FILE_INPUT: |
if (prog->getType() == Program::TYPE_FRAGMENT) { |
// don't load masked inputs, won't be assigned a slot |
if (!ptr && !(info->in[idx].mask & (1 << swz))) |
return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); |
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) |
return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); |
return interpolate(src, c, shiftAddress(ptr)); |
} else |
if (prog->getType() == Program::TYPE_GEOMETRY) { |
if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_PRIMID) |
return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); |
// XXX: This is going to be a problem with scalar arrays, i.e. when |
// we cannot assume that the address is given in units of vec4. |
// |
// nv50 and nvc0 need different things here, so let the lowering |
// passes decide what to do with the address |
if (ptr) |
return mkLoadv(TYPE_U32, srcToSym(src, c), ptr); |
} |
return mkLoadv(TYPE_U32, srcToSym(src, c), shiftAddress(ptr)); |
case TGSI_FILE_OUTPUT: |
assert(!"load from output file"); |
return NULL; |
case TGSI_FILE_SYSTEM_VALUE: |
assert(!ptr); |
return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); |
default: |
return getArrayForFile(src.getFile(), idx2d)->load( |
sub.cur->values, idx, swz, shiftAddress(ptr)); |
} |
} |
Value * |
Converter::acquireDst(int d, int c) |
{ |
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
const unsigned f = dst.getFile(); |
const int idx = dst.getIndex(0); |
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) |
return NULL; |
if (dst.isIndirect(0) || |
f == TGSI_FILE_SYSTEM_VALUE || |
(f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) |
return getScratch(); |
return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); |
} |
void |
Converter::storeDst(int d, int c, Value *val) |
{ |
const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); |
switch (tgsi.getSaturate()) { |
case TGSI_SAT_NONE: |
break; |
case TGSI_SAT_ZERO_ONE: |
mkOp1(OP_SAT, dstTy, val, val); |
break; |
case TGSI_SAT_MINUS_PLUS_ONE: |
mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); |
mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); |
break; |
default: |
assert(!"invalid saturation mode"); |
break; |
} |
Value *ptr = NULL; |
if (dst.isIndirect(0)) |
ptr = shiftAddress(fetchSrc(dst.getIndirect(0), 0, NULL)); |
if (info->io.genUserClip > 0 && |
dst.getFile() == TGSI_FILE_OUTPUT && |
!dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { |
mkMov(clipVtx[c], val); |
val = clipVtx[c]; |
} |
storeDst(dst, c, val, ptr); |
} |
void |
Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, |
Value *val, Value *ptr) |
{ |
const unsigned f = dst.getFile(); |
const int idx = dst.getIndex(0); |
const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; |
if (f == TGSI_FILE_SYSTEM_VALUE) { |
assert(!ptr); |
mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); |
} else |
if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { |
if (ptr || (info->out[idx].mask & (1 << c))) { |
/* Save the viewport index into a scratch register so that it can be |
exported at EMIT time */ |
if (info->out[idx].sn == TGSI_SEMANTIC_VIEWPORT_INDEX && |
viewport != NULL) |
mkOp1(OP_MOV, TYPE_U32, viewport, val); |
else |
mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); |
} |
} else |
if (f == TGSI_FILE_TEMPORARY || |
f == TGSI_FILE_PREDICATE || |
f == TGSI_FILE_ADDRESS || |
f == TGSI_FILE_OUTPUT) { |
getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); |
} else { |
assert(!"invalid dst file"); |
} |
} |
#define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ |
for (chan = 0; chan < 4; ++chan) \ |
if (!inst.getDst(d).isMasked(chan)) |
Value * |
Converter::buildDot(int dim) |
{ |
assert(dim > 0); |
Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); |
Value *dotp = getScratch(); |
mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); |
for (int c = 1; c < dim; ++c) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); |
} |
return dotp; |
} |
void |
Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) |
{ |
FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); |
join->fixed = 1; |
conv->insertHead(join); |
fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); |
fork->insertBefore(fork->getExit(), fork->joinAt); |
} |
void |
Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) |
{ |
unsigned rIdx = 0, sIdx = 0; |
if (R >= 0) |
rIdx = tgsi.getSrc(R).getIndex(0); |
if (S >= 0) |
sIdx = tgsi.getSrc(S).getIndex(0); |
tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); |
if (tgsi.getSrc(R).isIndirect(0)) { |
tex->tex.rIndirectSrc = s; |
tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); |
} |
if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { |
tex->tex.sIndirectSrc = s; |
tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); |
} |
} |
void |
Converter::handleTXQ(Value *dst0[4], enum TexQuery query) |
{ |
TexInstruction *tex = new_TexInstruction(func, OP_TXQ); |
tex->tex.query = query; |
unsigned int c, d; |
for (d = 0, c = 0; c < 4; ++c) { |
if (!dst0[c]) |
continue; |
tex->tex.mask |= 1 << c; |
tex->setDef(d++, dst0[c]); |
} |
tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level |
setTexRS(tex, c, 1, -1); |
bb->insertTail(tex); |
} |
void |
Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) |
{ |
Value *proj = fetchSrc(0, 3); |
Instruction *insn = proj->getUniqueInsn(); |
int c; |
if (insn->op == OP_PINTERP) { |
bb->insertTail(insn = cloneForward(func, insn)); |
insn->op = OP_LINTERP; |
insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); |
insn->setSrc(1, NULL); |
proj = insn->getDef(0); |
} |
proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); |
for (c = 0; c < 4; ++c) { |
if (!(mask & (1 << c))) |
continue; |
if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) |
continue; |
mask &= ~(1 << c); |
bb->insertTail(insn = cloneForward(func, insn)); |
insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); |
insn->setSrc(1, proj); |
dst[c] = insn->getDef(0); |
} |
if (!mask) |
return; |
proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); |
for (c = 0; c < 4; ++c) |
if (mask & (1 << c)) |
dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); |
} |
// order of nv50 ir sources: x y z layer lod/bias shadow |
// order of TGSI TEX sources: x y z layer shadow lod/bias |
// lowering will finally set the hw specific order (like array first on nvc0) |
void |
Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) |
{ |
Value *val; |
Value *arg[4], *src[8]; |
Value *lod = NULL, *shd = NULL; |
unsigned int s, c, d; |
TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
TexInstruction::Target tgt = tgsi.getTexture(code, R); |
for (s = 0; s < tgt.getArgCount(); ++s) |
arg[s] = src[s] = fetchSrc(0, s); |
if (texi->op == OP_TXL || texi->op == OP_TXB) |
lod = fetchSrc(L >> 4, L & 3); |
if (C == 0x0f) |
C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src |
if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && |
tgt == TEX_TARGET_CUBE_ARRAY_SHADOW) |
shd = fetchSrc(1, 0); |
else if (tgt.isShadow()) |
shd = fetchSrc(C >> 4, C & 3); |
if (texi->op == OP_TXD) { |
for (c = 0; c < tgt.getDim(); ++c) { |
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); |
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); |
} |
} |
// cube textures don't care about projection value, it's divided out |
if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { |
unsigned int n = tgt.getDim(); |
if (shd) { |
arg[n] = shd; |
++n; |
assert(tgt.getDim() == tgt.getArgCount()); |
} |
loadProjTexCoords(src, arg, (1 << n) - 1); |
if (shd) |
shd = src[n - 1]; |
} |
if (tgt.isCube()) { |
for (c = 0; c < 3; ++c) |
src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); |
val = getScratch(); |
mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); |
mkOp2(OP_MAX, TYPE_F32, val, src[2], val); |
mkOp1(OP_RCP, TYPE_F32, val, val); |
for (c = 0; c < 3; ++c) |
src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); |
} |
for (c = 0, d = 0; c < 4; ++c) { |
if (dst[c]) { |
texi->setDef(d++, dst[c]); |
texi->tex.mask |= 1 << c; |
} else { |
// NOTE: maybe hook up def too, for CSE |
} |
} |
for (s = 0; s < tgt.getArgCount(); ++s) |
texi->setSrc(s, src[s]); |
if (lod) |
texi->setSrc(s++, lod); |
if (shd) |
texi->setSrc(s++, shd); |
setTexRS(texi, s, R, S); |
if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) |
texi->tex.levelZero = true; |
if (tgsi.getOpcode() == TGSI_OPCODE_TG4 && !tgt.isShadow()) |
texi->tex.gatherComp = tgsi.getSrc(1).getValueU32(0, info); |
texi->tex.useOffsets = tgsi.getNumTexOffsets(); |
for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { |
for (c = 0; c < 3; ++c) { |
texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); |
texi->offset[s][c].setInsn(texi); |
} |
} |
bb->insertTail(texi); |
} |
// 1st source: xyz = coordinates, w = lod/sample |
// 2nd source: offset |
void |
Converter::handleTXF(Value *dst[4], int R, int L_M) |
{ |
TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); |
int ms; |
unsigned int c, d, s; |
texi->tex.target = tgsi.getTexture(code, R); |
ms = texi->tex.target.isMS() ? 1 : 0; |
texi->tex.levelZero = ms; /* MS textures don't have mip-maps */ |
for (c = 0, d = 0; c < 4; ++c) { |
if (dst[c]) { |
texi->setDef(d++, dst[c]); |
texi->tex.mask |= 1 << c; |
} |
} |
for (c = 0; c < (texi->tex.target.getArgCount() - ms); ++c) |
texi->setSrc(c, fetchSrc(0, c)); |
texi->setSrc(c++, fetchSrc(L_M >> 4, L_M & 3)); // lod or ms |
setTexRS(texi, c, R, -1); |
texi->tex.useOffsets = tgsi.getNumTexOffsets(); |
for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { |
for (c = 0; c < 3; ++c) { |
texi->offset[s][c].set(fetchSrc(tgsi.getTexOffset(s), c, NULL)); |
texi->offset[s][c].setInsn(texi); |
} |
} |
bb->insertTail(texi); |
} |
void |
Converter::handleLIT(Value *dst0[4]) |
{ |
Value *val0 = NULL; |
unsigned int mask = tgsi.getDst(0).getMask(); |
if (mask & (1 << 0)) |
loadImm(dst0[0], 1.0f); |
if (mask & (1 << 3)) |
loadImm(dst0[3], 1.0f); |
if (mask & (3 << 1)) { |
val0 = getScratch(); |
mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); |
if (mask & (1 << 1)) |
mkMov(dst0[1], val0); |
} |
if (mask & (1 << 2)) { |
Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); |
Value *val1 = getScratch(), *val3 = getScratch(); |
Value *pos128 = loadImm(NULL, +127.999999f); |
Value *neg128 = loadImm(NULL, -127.999999f); |
mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); |
mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); |
mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); |
mkOp2(OP_POW, TYPE_F32, val3, val1, val3); |
mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], TYPE_F32, val3, zero, val0); |
} |
} |
static inline bool |
isResourceSpecial(const int r) |
{ |
return (r == TGSI_RESOURCE_GLOBAL || |
r == TGSI_RESOURCE_LOCAL || |
r == TGSI_RESOURCE_PRIVATE || |
r == TGSI_RESOURCE_INPUT); |
} |
static inline bool |
isResourceRaw(const tgsi::Source *code, const int r) |
{ |
return isResourceSpecial(r) || code->resources[r].raw; |
} |
static inline nv50_ir::TexTarget |
getResourceTarget(const tgsi::Source *code, int r) |
{ |
if (isResourceSpecial(r)) |
return nv50_ir::TEX_TARGET_BUFFER; |
return tgsi::translateTexture(code->resources.at(r).target); |
} |
Symbol * |
Converter::getResourceBase(const int r) |
{ |
Symbol *sym = NULL; |
switch (r) { |
case TGSI_RESOURCE_GLOBAL: |
sym = new_Symbol(prog, nv50_ir::FILE_MEMORY_GLOBAL, 15); |
break; |
case TGSI_RESOURCE_LOCAL: |
assert(prog->getType() == Program::TYPE_COMPUTE); |
sym = mkSymbol(nv50_ir::FILE_MEMORY_SHARED, 0, TYPE_U32, |
info->prop.cp.sharedOffset); |
break; |
case TGSI_RESOURCE_PRIVATE: |
sym = mkSymbol(nv50_ir::FILE_MEMORY_LOCAL, 0, TYPE_U32, |
info->bin.tlsSpace); |
break; |
case TGSI_RESOURCE_INPUT: |
assert(prog->getType() == Program::TYPE_COMPUTE); |
sym = mkSymbol(nv50_ir::FILE_SHADER_INPUT, 0, TYPE_U32, |
info->prop.cp.inputOffset); |
break; |
default: |
sym = new_Symbol(prog, |
nv50_ir::FILE_MEMORY_GLOBAL, code->resources.at(r).slot); |
break; |
} |
return sym; |
} |
void |
Converter::getResourceCoords(std::vector<Value *> &coords, int r, int s) |
{ |
const int arg = |
TexInstruction::Target(getResourceTarget(code, r)).getArgCount(); |
for (int c = 0; c < arg; ++c) |
coords.push_back(fetchSrc(s, c)); |
// NOTE: TGSI_RESOURCE_GLOBAL needs FILE_GPR; this is an nv50 quirk |
if (r == TGSI_RESOURCE_LOCAL || |
r == TGSI_RESOURCE_PRIVATE || |
r == TGSI_RESOURCE_INPUT) |
coords[0] = mkOp1v(OP_MOV, TYPE_U32, getScratch(4, FILE_ADDRESS), |
coords[0]); |
} |
static inline int |
partitionLoadStore(uint8_t comp[2], uint8_t size[2], uint8_t mask) |
{ |
int n = 0; |
while (mask) { |
if (mask & 1) { |
size[n]++; |
} else { |
if (size[n]) |
comp[n = 1] = size[0] + 1; |
else |
comp[n]++; |
} |
mask >>= 1; |
} |
if (size[0] == 3) { |
n = 1; |
size[0] = (comp[0] == 1) ? 1 : 2; |
size[1] = 3 - size[0]; |
comp[1] = comp[0] + size[0]; |
} |
return n + 1; |
} |
// For raw loads, granularity is 4 byte. |
// Usage of the texture read mask on OP_SULDP is not allowed. |
void |
Converter::handleLOAD(Value *dst0[4]) |
{ |
const int r = tgsi.getSrc(0).getIndex(0); |
int c; |
std::vector<Value *> off, src, ldv, def; |
getResourceCoords(off, r, 1); |
if (isResourceRaw(code, r)) { |
uint8_t mask = 0; |
uint8_t comp[2] = { 0, 0 }; |
uint8_t size[2] = { 0, 0 }; |
Symbol *base = getResourceBase(r); |
// determine the base and size of the at most 2 load ops |
for (c = 0; c < 4; ++c) |
if (!tgsi.getDst(0).isMasked(c)) |
mask |= 1 << (tgsi.getSrc(0).getSwizzle(c) - TGSI_SWIZZLE_X); |
int n = partitionLoadStore(comp, size, mask); |
src = off; |
def.resize(4); // index by component, the ones we need will be non-NULL |
for (c = 0; c < 4; ++c) { |
if (dst0[c] && tgsi.getSrc(0).getSwizzle(c) == (TGSI_SWIZZLE_X + c)) |
def[c] = dst0[c]; |
else |
if (mask & (1 << c)) |
def[c] = getScratch(); |
} |
const bool useLd = isResourceSpecial(r) || |
(info->io.nv50styleSurfaces && |
code->resources[r].target == TGSI_TEXTURE_BUFFER); |
for (int i = 0; i < n; ++i) { |
ldv.assign(def.begin() + comp[i], def.begin() + comp[i] + size[i]); |
if (comp[i]) // adjust x component of source address if necessary |
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
off[0], mkImm(comp[i] * 4)); |
else |
src[0] = off[0]; |
if (useLd) { |
Instruction *ld = |
mkLoad(typeOfSize(size[i] * 4), ldv[0], base, src[0]); |
for (size_t c = 1; c < ldv.size(); ++c) |
ld->setDef(c, ldv[c]); |
} else { |
mkTex(OP_SULDB, getResourceTarget(code, r), code->resources[r].slot, |
0, ldv, src)->dType = typeOfSize(size[i] * 4); |
} |
} |
} else { |
def.resize(4); |
for (c = 0; c < 4; ++c) { |
if (!dst0[c] || tgsi.getSrc(0).getSwizzle(c) != (TGSI_SWIZZLE_X + c)) |
def[c] = getScratch(); |
else |
def[c] = dst0[c]; |
} |
mkTex(OP_SULDP, getResourceTarget(code, r), code->resources[r].slot, 0, |
def, off); |
} |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
if (dst0[c] != def[c]) |
mkMov(dst0[c], def[tgsi.getSrc(0).getSwizzle(c)]); |
} |
// For formatted stores, the write mask on OP_SUSTP can be used. |
// Raw stores have to be split. |
void |
Converter::handleSTORE() |
{ |
const int r = tgsi.getDst(0).getIndex(0); |
int c; |
std::vector<Value *> off, src, dummy; |
getResourceCoords(off, r, 0); |
src = off; |
const int s = src.size(); |
if (isResourceRaw(code, r)) { |
uint8_t comp[2] = { 0, 0 }; |
uint8_t size[2] = { 0, 0 }; |
int n = partitionLoadStore(comp, size, tgsi.getDst(0).getMask()); |
Symbol *base = getResourceBase(r); |
const bool useSt = isResourceSpecial(r) || |
(info->io.nv50styleSurfaces && |
code->resources[r].target == TGSI_TEXTURE_BUFFER); |
for (int i = 0; i < n; ++i) { |
if (comp[i]) // adjust x component of source address if necessary |
src[0] = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, off[0]->reg.file), |
off[0], mkImm(comp[i] * 4)); |
else |
src[0] = off[0]; |
const DataType stTy = typeOfSize(size[i] * 4); |
if (useSt) { |
Instruction *st = |
mkStore(OP_STORE, stTy, base, NULL, fetchSrc(1, comp[i])); |
for (c = 1; c < size[i]; ++c) |
st->setSrc(1 + c, fetchSrc(1, comp[i] + c)); |
st->setIndirect(0, 0, src[0]); |
} else { |
// attach values to be stored |
src.resize(s + size[i]); |
for (c = 0; c < size[i]; ++c) |
src[s + c] = fetchSrc(1, comp[i] + c); |
mkTex(OP_SUSTB, getResourceTarget(code, r), code->resources[r].slot, |
0, dummy, src)->setType(stTy); |
} |
} |
} else { |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
src.push_back(fetchSrc(1, c)); |
mkTex(OP_SUSTP, getResourceTarget(code, r), code->resources[r].slot, 0, |
dummy, src)->tex.mask = tgsi.getDst(0).getMask(); |
} |
} |
// XXX: These only work on resources with the single-component u32/s32 formats. |
// Therefore the result is replicated. This might not be intended by TGSI, but |
// operating on more than 1 component would produce undefined results because |
// they do not exist. |
void |
Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp) |
{ |
const int r = tgsi.getSrc(0).getIndex(0); |
std::vector<Value *> srcv; |
std::vector<Value *> defv; |
LValue *dst = getScratch(); |
getResourceCoords(srcv, r, 1); |
if (isResourceSpecial(r)) { |
assert(r != TGSI_RESOURCE_INPUT); |
Instruction *insn; |
insn = mkOp2(OP_ATOM, ty, dst, getResourceBase(r), fetchSrc(2, 0)); |
insn->subOp = subOp; |
if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
insn->setSrc(2, fetchSrc(3, 0)); |
insn->setIndirect(0, 0, srcv.at(0)); |
} else { |
operation op = isResourceRaw(code, r) ? OP_SUREDB : OP_SUREDP; |
TexTarget targ = getResourceTarget(code, r); |
int idx = code->resources[r].slot; |
defv.push_back(dst); |
srcv.push_back(fetchSrc(2, 0)); |
if (subOp == NV50_IR_SUBOP_ATOM_CAS) |
srcv.push_back(fetchSrc(3, 0)); |
TexInstruction *tex = mkTex(op, targ, idx, 0, defv, srcv); |
tex->subOp = subOp; |
tex->tex.mask = 1; |
tex->setType(ty); |
} |
for (int c = 0; c < 4; ++c) |
if (dst0[c]) |
dst0[c] = dst; // not equal to rDst so handleInstruction will do mkMov |
} |
void |
Converter::handleINTERP(Value *dst[4]) |
{ |
// Check whether the input is linear. All other attributes ignored. |
Instruction *insn; |
Value *offset = NULL, *ptr = NULL, *w = NULL; |
bool linear; |
operation op; |
int c, mode; |
tgsi::Instruction::SrcRegister src = tgsi.getSrc(0); |
assert(src.getFile() == TGSI_FILE_INPUT); |
if (src.isIndirect(0)) |
ptr = fetchSrc(src.getIndirect(0), 0, NULL); |
// XXX: no way to know interp mode if we don't know the index |
linear = info->in[ptr ? 0 : src.getIndex(0)].linear; |
if (linear) { |
op = OP_LINTERP; |
mode = NV50_IR_INTERP_LINEAR; |
} else { |
op = OP_PINTERP; |
mode = NV50_IR_INTERP_PERSPECTIVE; |
} |
switch (tgsi.getOpcode()) { |
case TGSI_OPCODE_INTERP_CENTROID: |
mode |= NV50_IR_INTERP_CENTROID; |
break; |
case TGSI_OPCODE_INTERP_SAMPLE: |
insn = mkOp1(OP_PIXLD, TYPE_U32, (offset = getScratch()), fetchSrc(1, 0)); |
insn->subOp = NV50_IR_SUBOP_PIXLD_OFFSET; |
mode |= NV50_IR_INTERP_OFFSET; |
break; |
case TGSI_OPCODE_INTERP_OFFSET: { |
// The input in src1.xy is float, but we need a single 32-bit value |
// where the upper and lower 16 bits are encoded in S0.12 format. We need |
// to clamp the input coordinates to (-0.5, 0.4375), multiply by 4096, |
// and then convert to s32. |
Value *offs[2]; |
for (c = 0; c < 2; c++) { |
offs[c] = fetchSrc(1, c); |
mkOp2(OP_MIN, TYPE_F32, offs[c], offs[c], loadImm(NULL, 0.4375f)); |
mkOp2(OP_MAX, TYPE_F32, offs[c], offs[c], loadImm(NULL, -0.5f)); |
mkOp2(OP_MUL, TYPE_F32, offs[c], offs[c], loadImm(NULL, 4096.0f)); |
mkCvt(OP_CVT, TYPE_S32, offs[c], TYPE_F32, offs[c]); |
} |
offset = mkOp3v(OP_INSBF, TYPE_U32, getScratch(), |
offs[1], mkImm(0x1010), offs[0]); |
mode |= NV50_IR_INTERP_OFFSET; |
break; |
} |
} |
if (op == OP_PINTERP) { |
if (offset) { |
w = mkOp2v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 3), offset); |
mkOp1(OP_RCP, TYPE_F32, w, w); |
} else { |
w = fragCoord[3]; |
} |
} |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
insn = mkOp1(op, TYPE_F32, dst[c], srcToSym(src, c)); |
if (op == OP_PINTERP) |
insn->setSrc(1, w); |
if (ptr) |
insn->setIndirect(0, 0, ptr); |
if (offset) |
insn->setSrc(op == OP_PINTERP ? 2 : 1, offset); |
insn->setInterpolate(mode); |
} |
} |
Converter::Subroutine * |
Converter::getSubroutine(unsigned ip) |
{ |
std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); |
if (it == sub.map.end()) |
it = sub.map.insert(std::make_pair( |
ip, Subroutine(new Function(prog, "SUB", ip)))).first; |
return &it->second; |
} |
Converter::Subroutine * |
Converter::getSubroutine(Function *f) |
{ |
unsigned ip = f->getLabel(); |
std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); |
if (it == sub.map.end()) |
it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; |
return &it->second; |
} |
bool |
Converter::isEndOfSubroutine(uint ip) |
{ |
assert(ip < code->scan.num_instructions); |
tgsi::Instruction insn(&code->insns[ip]); |
return (insn.getOpcode() == TGSI_OPCODE_END || |
insn.getOpcode() == TGSI_OPCODE_ENDSUB || |
// does END occur at end of main or the very end ? |
insn.getOpcode() == TGSI_OPCODE_BGNSUB); |
} |
bool |
Converter::handleInstruction(const struct tgsi_full_instruction *insn) |
{ |
Instruction *geni; |
Value *dst0[4], *rDst0[4]; |
Value *src0, *src1, *src2, *src3; |
Value *val0, *val1; |
int c; |
tgsi = tgsi::Instruction(insn); |
bool useScratchDst = tgsi.checkDstSrcAliasing(); |
operation op = tgsi.getOP(); |
dstTy = tgsi.inferDstType(); |
srcTy = tgsi.inferSrcType(); |
unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; |
if (tgsi.dstCount()) { |
for (c = 0; c < 4; ++c) { |
rDst0[c] = acquireDst(0, c); |
dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; |
} |
} |
switch (tgsi.getOpcode()) { |
case TGSI_OPCODE_ADD: |
case TGSI_OPCODE_UADD: |
case TGSI_OPCODE_AND: |
case TGSI_OPCODE_DIV: |
case TGSI_OPCODE_IDIV: |
case TGSI_OPCODE_UDIV: |
case TGSI_OPCODE_MAX: |
case TGSI_OPCODE_MIN: |
case TGSI_OPCODE_IMAX: |
case TGSI_OPCODE_IMIN: |
case TGSI_OPCODE_UMAX: |
case TGSI_OPCODE_UMIN: |
case TGSI_OPCODE_MOD: |
case TGSI_OPCODE_UMOD: |
case TGSI_OPCODE_MUL: |
case TGSI_OPCODE_UMUL: |
case TGSI_OPCODE_IMUL_HI: |
case TGSI_OPCODE_UMUL_HI: |
case TGSI_OPCODE_OR: |
case TGSI_OPCODE_SHL: |
case TGSI_OPCODE_ISHR: |
case TGSI_OPCODE_USHR: |
case TGSI_OPCODE_SUB: |
case TGSI_OPCODE_XOR: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
geni = mkOp2(op, dstTy, dst0[c], src0, src1); |
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); |
} |
break; |
case TGSI_OPCODE_MAD: |
case TGSI_OPCODE_UMAD: |
case TGSI_OPCODE_SAD: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
mkOp3(op, dstTy, dst0[c], src0, src1, src2); |
} |
break; |
case TGSI_OPCODE_MOV: |
case TGSI_OPCODE_ABS: |
case TGSI_OPCODE_CEIL: |
case TGSI_OPCODE_FLR: |
case TGSI_OPCODE_TRUNC: |
case TGSI_OPCODE_RCP: |
case TGSI_OPCODE_IABS: |
case TGSI_OPCODE_INEG: |
case TGSI_OPCODE_NOT: |
case TGSI_OPCODE_DDX: |
case TGSI_OPCODE_DDY: |
case TGSI_OPCODE_DDX_FINE: |
case TGSI_OPCODE_DDY_FINE: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); |
break; |
case TGSI_OPCODE_RSQ: |
src0 = fetchSrc(0, 0); |
val0 = getScratch(); |
mkOp1(OP_ABS, TYPE_F32, val0, src0); |
mkOp1(OP_RSQ, TYPE_F32, val0, val0); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkMov(dst0[c], val0); |
break; |
case TGSI_OPCODE_ARL: |
case TGSI_OPCODE_ARR: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
const RoundMode rnd = |
tgsi.getOpcode() == TGSI_OPCODE_ARR ? ROUND_N : ROUND_M; |
src0 = fetchSrc(0, c); |
mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = rnd; |
} |
break; |
case TGSI_OPCODE_UARL: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkOp1(OP_MOV, TYPE_U32, dst0[c], fetchSrc(0, c)); |
break; |
case TGSI_OPCODE_POW: |
val0 = mkOp2v(op, TYPE_F32, getScratch(), fetchSrc(0, 0), fetchSrc(1, 0)); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); |
break; |
case TGSI_OPCODE_EX2: |
case TGSI_OPCODE_LG2: |
val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); |
break; |
case TGSI_OPCODE_COS: |
case TGSI_OPCODE_SIN: |
val0 = getScratch(); |
if (mask & 7) { |
mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); |
mkOp1(op, TYPE_F32, val0, val0); |
for (c = 0; c < 3; ++c) |
if (dst0[c]) |
mkMov(dst0[c], val0); |
} |
if (dst0[3]) { |
mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); |
mkOp1(op, TYPE_F32, dst0[3], val0); |
} |
break; |
case TGSI_OPCODE_SCS: |
if (mask & 3) { |
val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
if (dst0[0]) |
mkOp1(OP_COS, TYPE_F32, dst0[0], val0); |
if (dst0[1]) |
mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); |
} |
if (dst0[2]) |
loadImm(dst0[2], 0.0f); |
if (dst0[3]) |
loadImm(dst0[3], 1.0f); |
break; |
case TGSI_OPCODE_EXP: |
src0 = fetchSrc(0, 0); |
val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); |
if (dst0[1]) |
mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); |
if (dst0[0]) |
mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); |
if (dst0[2]) |
mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); |
if (dst0[3]) |
loadImm(dst0[3], 1.0f); |
break; |
case TGSI_OPCODE_LOG: |
src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); |
val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); |
if (dst0[0] || dst0[1]) |
val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); |
if (dst0[1]) { |
mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); |
mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); |
mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); |
} |
if (dst0[3]) |
loadImm(dst0[3], 1.0f); |
break; |
case TGSI_OPCODE_DP2: |
val0 = buildDot(2); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkMov(dst0[c], val0); |
break; |
case TGSI_OPCODE_DP3: |
val0 = buildDot(3); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkMov(dst0[c], val0); |
break; |
case TGSI_OPCODE_DP4: |
val0 = buildDot(4); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkMov(dst0[c], val0); |
break; |
case TGSI_OPCODE_DPH: |
val0 = buildDot(3); |
src1 = fetchSrc(1, 3); |
mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkMov(dst0[c], val0); |
break; |
case TGSI_OPCODE_DST: |
if (dst0[0]) |
loadImm(dst0[0], 1.0f); |
if (dst0[1]) { |
src0 = fetchSrc(0, 1); |
src1 = fetchSrc(1, 1); |
mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); |
} |
if (dst0[2]) |
mkMov(dst0[2], fetchSrc(0, 2)); |
if (dst0[3]) |
mkMov(dst0[3], fetchSrc(1, 3)); |
break; |
case TGSI_OPCODE_LRP: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
mkOp3(OP_MAD, TYPE_F32, dst0[c], |
mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); |
} |
break; |
case TGSI_OPCODE_LIT: |
handleLIT(dst0); |
break; |
case TGSI_OPCODE_XPD: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
if (c < 3) { |
val0 = getSSA(); |
src0 = fetchSrc(1, (c + 1) % 3); |
src1 = fetchSrc(0, (c + 2) % 3); |
mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); |
mkOp1(OP_NEG, TYPE_F32, val0, val0); |
src0 = fetchSrc(0, (c + 1) % 3); |
src1 = fetchSrc(1, (c + 2) % 3); |
mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); |
} else { |
loadImm(dst0[c], 1.0f); |
} |
} |
break; |
case TGSI_OPCODE_ISSG: |
case TGSI_OPCODE_SSG: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
val0 = getScratch(); |
val1 = getScratch(); |
mkCmp(OP_SET, CC_GT, srcTy, val0, srcTy, src0, zero); |
mkCmp(OP_SET, CC_LT, srcTy, val1, srcTy, src0, zero); |
if (srcTy == TYPE_F32) |
mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); |
else |
mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); |
} |
break; |
case TGSI_OPCODE_UCMP: |
case TGSI_OPCODE_CMP: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
if (src1 == src2) |
mkMov(dst0[c], src1); |
else |
mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, |
srcTy, dst0[c], srcTy, src1, src2, src0); |
} |
break; |
case TGSI_OPCODE_FRC: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
val0 = getScratch(); |
mkOp1(OP_FLOOR, TYPE_F32, val0, src0); |
mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); |
} |
break; |
case TGSI_OPCODE_ROUND: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) |
->rnd = ROUND_NI; |
break; |
case TGSI_OPCODE_CLAMP: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
val0 = getScratch(); |
mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); |
mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); |
} |
break; |
case TGSI_OPCODE_SLT: |
case TGSI_OPCODE_SGE: |
case TGSI_OPCODE_SEQ: |
case TGSI_OPCODE_SGT: |
case TGSI_OPCODE_SLE: |
case TGSI_OPCODE_SNE: |
case TGSI_OPCODE_FSEQ: |
case TGSI_OPCODE_FSGE: |
case TGSI_OPCODE_FSLT: |
case TGSI_OPCODE_FSNE: |
case TGSI_OPCODE_ISGE: |
case TGSI_OPCODE_ISLT: |
case TGSI_OPCODE_USEQ: |
case TGSI_OPCODE_USGE: |
case TGSI_OPCODE_USLT: |
case TGSI_OPCODE_USNE: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); |
} |
break; |
case TGSI_OPCODE_KILL_IF: |
val0 = new_LValue(func, FILE_PREDICATE); |
mask = 0; |
for (c = 0; c < 4; ++c) { |
const int s = tgsi.getSrc(0).getSwizzle(c); |
if (mask & (1 << s)) |
continue; |
mask |= 1 << s; |
mkCmp(OP_SET, CC_LT, TYPE_F32, val0, TYPE_F32, fetchSrc(0, c), zero); |
mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); |
} |
break; |
case TGSI_OPCODE_KILL: |
mkOp(OP_DISCARD, TYPE_NONE, NULL); |
break; |
case TGSI_OPCODE_TEX: |
case TGSI_OPCODE_TXB: |
case TGSI_OPCODE_TXL: |
case TGSI_OPCODE_TXP: |
case TGSI_OPCODE_LODQ: |
// R S L C Dx Dy |
handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); |
break; |
case TGSI_OPCODE_TXD: |
handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); |
break; |
case TGSI_OPCODE_TG4: |
handleTEX(dst0, 2, 2, 0x03, 0x0f, 0x00, 0x00); |
break; |
case TGSI_OPCODE_TEX2: |
handleTEX(dst0, 2, 2, 0x03, 0x10, 0x00, 0x00); |
break; |
case TGSI_OPCODE_TXB2: |
case TGSI_OPCODE_TXL2: |
handleTEX(dst0, 2, 2, 0x10, 0x0f, 0x00, 0x00); |
break; |
case TGSI_OPCODE_SAMPLE: |
case TGSI_OPCODE_SAMPLE_B: |
case TGSI_OPCODE_SAMPLE_D: |
case TGSI_OPCODE_SAMPLE_L: |
case TGSI_OPCODE_SAMPLE_C: |
case TGSI_OPCODE_SAMPLE_C_LZ: |
handleTEX(dst0, 1, 2, 0x30, 0x30, 0x30, 0x40); |
break; |
case TGSI_OPCODE_TXF: |
handleTXF(dst0, 1, 0x03); |
break; |
case TGSI_OPCODE_SAMPLE_I: |
handleTXF(dst0, 1, 0x03); |
break; |
case TGSI_OPCODE_SAMPLE_I_MS: |
handleTXF(dst0, 1, 0x20); |
break; |
case TGSI_OPCODE_TXQ: |
case TGSI_OPCODE_SVIEWINFO: |
handleTXQ(dst0, TXQ_DIMS); |
break; |
case TGSI_OPCODE_F2I: |
case TGSI_OPCODE_F2U: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; |
break; |
case TGSI_OPCODE_I2F: |
case TGSI_OPCODE_U2F: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) |
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); |
break; |
case TGSI_OPCODE_EMIT: |
/* export the saved viewport index */ |
if (viewport != NULL) { |
Symbol *vpSym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_U32, |
info->out[info->io.viewportId].slot[0] * 4); |
mkStore(OP_EXPORT, TYPE_U32, vpSym, NULL, viewport); |
} |
/* fallthrough */ |
case TGSI_OPCODE_ENDPRIM: |
{ |
// get vertex stream (must be immediate) |
unsigned int stream = tgsi.getSrc(0).getValueU32(0, info); |
if (stream && op == OP_RESTART) |
break; |
src0 = mkImm(stream); |
mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; |
break; |
} |
case TGSI_OPCODE_IF: |
case TGSI_OPCODE_UIF: |
{ |
BasicBlock *ifBB = new BasicBlock(func); |
bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); |
condBBs.push(bb); |
joinBBs.push(bb); |
mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0))->setType(srcTy); |
setPosition(ifBB, true); |
} |
break; |
case TGSI_OPCODE_ELSE: |
{ |
BasicBlock *elseBB = new BasicBlock(func); |
BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); |
forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); |
condBBs.push(bb); |
forkBB->getExit()->asFlow()->target.bb = elseBB; |
if (!bb->isTerminated()) |
mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); |
setPosition(elseBB, true); |
} |
break; |
case TGSI_OPCODE_ENDIF: |
{ |
BasicBlock *convBB = new BasicBlock(func); |
BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); |
BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); |
if (!bb->isTerminated()) { |
// we only want join if none of the clauses ended with CONT/BREAK/RET |
if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) |
insertConvergenceOps(convBB, forkBB); |
mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); |
bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
} |
if (prevBB->getExit()->op == OP_BRA) { |
prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); |
prevBB->getExit()->asFlow()->target.bb = convBB; |
} |
setPosition(convBB, true); |
} |
break; |
case TGSI_OPCODE_BGNLOOP: |
{ |
BasicBlock *lbgnBB = new BasicBlock(func); |
BasicBlock *lbrkBB = new BasicBlock(func); |
loopBBs.push(lbgnBB); |
breakBBs.push(lbrkBB); |
if (loopBBs.getSize() > func->loopNestingBound) |
func->loopNestingBound++; |
mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); |
bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); |
setPosition(lbgnBB, true); |
mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); |
} |
break; |
case TGSI_OPCODE_ENDLOOP: |
{ |
BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); |
if (!bb->isTerminated()) { |
mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); |
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); |
} |
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); |
} |
break; |
case TGSI_OPCODE_BRK: |
{ |
if (bb->isTerminated()) |
break; |
BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); |
mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); |
bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); |
} |
break; |
case TGSI_OPCODE_CONT: |
{ |
if (bb->isTerminated()) |
break; |
BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); |
mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); |
contBB->explicitCont = true; |
bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); |
} |
break; |
case TGSI_OPCODE_BGNSUB: |
{ |
Subroutine *s = getSubroutine(ip); |
BasicBlock *entry = new BasicBlock(s->f); |
BasicBlock *leave = new BasicBlock(s->f); |
// multiple entrypoints possible, keep the graph connected |
if (prog->getType() == Program::TYPE_COMPUTE) |
prog->main->call.attach(&s->f->call, Graph::Edge::TREE); |
sub.cur = s; |
s->f->setEntry(entry); |
s->f->setExit(leave); |
setPosition(entry, true); |
return true; |
} |
case TGSI_OPCODE_ENDSUB: |
{ |
sub.cur = getSubroutine(prog->main); |
setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); |
return true; |
} |
case TGSI_OPCODE_CAL: |
{ |
Subroutine *s = getSubroutine(tgsi.getLabel()); |
mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); |
func->call.attach(&s->f->call, Graph::Edge::TREE); |
return true; |
} |
case TGSI_OPCODE_RET: |
{ |
if (bb->isTerminated()) |
return true; |
BasicBlock *leave = BasicBlock::get(func->cfgExit); |
if (!isEndOfSubroutine(ip + 1)) { |
// insert a PRERET at the entry if this is an early return |
// (only needed for sharing code in the epilogue) |
BasicBlock *pos = getBB(); |
setPosition(BasicBlock::get(func->cfg.getRoot()), false); |
mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; |
setPosition(pos, true); |
} |
mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; |
bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); |
} |
break; |
case TGSI_OPCODE_END: |
{ |
// attach and generate epilogue code |
BasicBlock *epilogue = BasicBlock::get(func->cfgExit); |
bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); |
setPosition(epilogue, true); |
if (prog->getType() == Program::TYPE_FRAGMENT) |
exportOutputs(); |
if (info->io.genUserClip > 0) |
handleUserClipPlanes(); |
mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; |
} |
break; |
case TGSI_OPCODE_SWITCH: |
case TGSI_OPCODE_CASE: |
ERROR("switch/case opcode encountered, should have been lowered\n"); |
abort(); |
break; |
case TGSI_OPCODE_LOAD: |
handleLOAD(dst0); |
break; |
case TGSI_OPCODE_STORE: |
handleSTORE(); |
break; |
case TGSI_OPCODE_BARRIER: |
geni = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); |
geni->fixed = 1; |
geni->subOp = NV50_IR_SUBOP_BAR_SYNC; |
break; |
case TGSI_OPCODE_MFENCE: |
case TGSI_OPCODE_LFENCE: |
case TGSI_OPCODE_SFENCE: |
geni = mkOp(OP_MEMBAR, TYPE_NONE, NULL); |
geni->fixed = 1; |
geni->subOp = tgsi::opcodeToSubOp(tgsi.getOpcode()); |
break; |
case TGSI_OPCODE_ATOMUADD: |
case TGSI_OPCODE_ATOMXCHG: |
case TGSI_OPCODE_ATOMCAS: |
case TGSI_OPCODE_ATOMAND: |
case TGSI_OPCODE_ATOMOR: |
case TGSI_OPCODE_ATOMXOR: |
case TGSI_OPCODE_ATOMUMIN: |
case TGSI_OPCODE_ATOMIMIN: |
case TGSI_OPCODE_ATOMUMAX: |
case TGSI_OPCODE_ATOMIMAX: |
handleATOM(dst0, dstTy, tgsi::opcodeToSubOp(tgsi.getOpcode())); |
break; |
case TGSI_OPCODE_IBFE: |
case TGSI_OPCODE_UBFE: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
mkOp3(OP_INSBF, TYPE_U32, src1, src2, mkImm(0x808), src1); |
mkOp2(OP_EXTBF, dstTy, dst0[c], src0, src1); |
} |
break; |
case TGSI_OPCODE_BFI: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
src1 = fetchSrc(1, c); |
src2 = fetchSrc(2, c); |
src3 = fetchSrc(3, c); |
mkOp3(OP_INSBF, TYPE_U32, src2, src3, mkImm(0x808), src2); |
mkOp3(OP_INSBF, TYPE_U32, dst0[c], src1, src2, src0); |
} |
break; |
case TGSI_OPCODE_LSB: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
geni = mkOp2(OP_EXTBF, TYPE_U32, src0, src0, mkImm(0x2000)); |
geni->subOp = NV50_IR_SUBOP_EXTBF_REV; |
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], src0); |
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; |
} |
break; |
case TGSI_OPCODE_IMSB: |
case TGSI_OPCODE_UMSB: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
mkOp1(OP_BFIND, srcTy, dst0[c], src0); |
} |
break; |
case TGSI_OPCODE_BREV: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); |
geni->subOp = NV50_IR_SUBOP_EXTBF_REV; |
} |
break; |
case TGSI_OPCODE_POPC: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = fetchSrc(0, c); |
mkOp2(OP_POPCNT, TYPE_U32, dst0[c], src0, src0); |
} |
break; |
case TGSI_OPCODE_INTERP_CENTROID: |
case TGSI_OPCODE_INTERP_SAMPLE: |
case TGSI_OPCODE_INTERP_OFFSET: |
handleINTERP(dst0); |
break; |
case TGSI_OPCODE_D2I: |
case TGSI_OPCODE_D2U: |
case TGSI_OPCODE_D2F: { |
int pos = 0; |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
Value *dreg = getSSA(8); |
src0 = fetchSrc(0, pos); |
src1 = fetchSrc(0, pos + 1); |
mkOp2(OP_MERGE, TYPE_U64, dreg, src0, src1); |
mkCvt(OP_CVT, dstTy, dst0[c], srcTy, dreg); |
pos += 2; |
} |
break; |
} |
case TGSI_OPCODE_I2D: |
case TGSI_OPCODE_U2D: |
case TGSI_OPCODE_F2D: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
Value *dreg = getSSA(8); |
mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2)); |
mkSplit(&dst0[c], 4, dreg); |
c++; |
} |
break; |
case TGSI_OPCODE_DABS: |
case TGSI_OPCODE_DNEG: |
case TGSI_OPCODE_DRCP: |
case TGSI_OPCODE_DSQRT: |
case TGSI_OPCODE_DRSQ: |
case TGSI_OPCODE_DTRUNC: |
case TGSI_OPCODE_DCEIL: |
case TGSI_OPCODE_DFLR: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
Value *dst = getSSA(8), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
mkOp1(op, dstTy, dst, src0); |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
case TGSI_OPCODE_DFRAC: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
Value *dst = getSSA(8), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
mkOp1(OP_FLOOR, TYPE_F64, dst, src0); |
mkOp2(OP_SUB, TYPE_F64, dst, src0, dst); |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
case TGSI_OPCODE_DSLT: |
case TGSI_OPCODE_DSGE: |
case TGSI_OPCODE_DSEQ: |
case TGSI_OPCODE_DSNE: { |
int pos = 0; |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
Value *tmp[2]; |
src0 = getSSA(8); |
src1 = getSSA(8); |
tmp[0] = fetchSrc(0, pos); |
tmp[1] = fetchSrc(0, pos + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
tmp[0] = fetchSrc(1, pos); |
tmp[1] = fetchSrc(1, pos + 1); |
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], srcTy, src0, src1); |
pos += 2; |
} |
break; |
} |
case TGSI_OPCODE_DADD: |
case TGSI_OPCODE_DMUL: |
case TGSI_OPCODE_DMAX: |
case TGSI_OPCODE_DMIN: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
src1 = getSSA(8); |
Value *dst = getSSA(8), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
tmp[0] = fetchSrc(1, c); |
tmp[1] = fetchSrc(1, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
mkOp2(op, dstTy, dst, src0, src1); |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
case TGSI_OPCODE_DMAD: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
src1 = getSSA(8); |
src2 = getSSA(8); |
Value *dst = getSSA(8), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
tmp[0] = fetchSrc(1, c); |
tmp[1] = fetchSrc(1, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src1, tmp[0], tmp[1]); |
tmp[0] = fetchSrc(2, c); |
tmp[1] = fetchSrc(2, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src2, tmp[0], tmp[1]); |
mkOp3(op, dstTy, dst, src0, src1, src2); |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
case TGSI_OPCODE_DROUND: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
Value *dst = getSSA(8), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F64, src0) |
->rnd = ROUND_NI; |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
case TGSI_OPCODE_DSSG: |
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { |
src0 = getSSA(8); |
Value *dst = getSSA(8), *dstF32 = getSSA(), *tmp[2]; |
tmp[0] = fetchSrc(0, c); |
tmp[1] = fetchSrc(0, c + 1); |
mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]); |
val0 = getScratch(); |
val1 = getScratch(); |
// The zero is wrong here since it's only 32-bit, but it works out in |
// the end since it gets replaced with $r63. |
mkCmp(OP_SET, CC_GT, TYPE_F32, val0, TYPE_F64, src0, zero); |
mkCmp(OP_SET, CC_LT, TYPE_F32, val1, TYPE_F64, src0, zero); |
mkOp2(OP_SUB, TYPE_F32, dstF32, val0, val1); |
mkCvt(OP_CVT, TYPE_F64, dst, TYPE_F32, dstF32); |
mkSplit(&dst0[c], 4, dst); |
c++; |
} |
break; |
default: |
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); |
assert(0); |
break; |
} |
if (tgsi.dstCount()) { |
for (c = 0; c < 4; ++c) { |
if (!dst0[c]) |
continue; |
if (dst0[c] != rDst0[c]) |
mkMov(rDst0[c], dst0[c]); |
storeDst(0, c, rDst0[c]); |
} |
} |
vtxBaseValid = 0; |
return true; |
} |
void |
Converter::handleUserClipPlanes() |
{ |
Value *res[8]; |
int n, i, c; |
for (c = 0; c < 4; ++c) { |
for (i = 0; i < info->io.genUserClip; ++i) { |
Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpCBSlot, |
TYPE_F32, info->io.ucpBase + i * 16 + c * 4); |
Value *ucp = mkLoadv(TYPE_F32, sym, NULL); |
if (c == 0) |
res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); |
else |
mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); |
} |
} |
const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; |
for (i = 0; i < info->io.genUserClip; ++i) { |
n = i / 4 + first; |
c = i % 4; |
Symbol *sym = |
mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); |
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); |
} |
} |
void |
Converter::exportOutputs() |
{ |
for (unsigned int i = 0; i < info->numOutputs; ++i) { |
for (unsigned int c = 0; c < 4; ++c) { |
if (!oData.exists(sub.cur->values, i, c)) |
continue; |
Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, |
info->out[i].slot[c] * 4); |
Value *val = oData.load(sub.cur->values, i, c, NULL); |
if (val) |
mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); |
} |
} |
} |
Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), |
code(code), |
tgsi(NULL), |
tData(this), aData(this), pData(this), oData(this) |
{ |
info = code->info; |
const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; |
const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); |
const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); |
const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); |
const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); |
tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); |
pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); |
aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_GPR, 0); |
oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); |
zero = mkImm((uint32_t)0); |
vtxBaseValid = 0; |
} |
Converter::~Converter() |
{ |
} |
inline const Converter::Location * |
Converter::BindArgumentsPass::getValueLocation(Subroutine *s, Value *v) |
{ |
ValueMap::l_iterator it = s->values.l.find(v); |
return it == s->values.l.end() ? NULL : &it->second; |
} |
template<typename T> inline void |
Converter::BindArgumentsPass::updateCallArgs( |
Instruction *i, void (Instruction::*setArg)(int, Value *), |
T (Function::*proto)) |
{ |
Function *g = i->asFlow()->target.fn; |
Subroutine *subg = conv.getSubroutine(g); |
for (unsigned a = 0; a < (g->*proto).size(); ++a) { |
Value *v = (g->*proto)[a].get(); |
const Converter::Location &l = *getValueLocation(subg, v); |
Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); |
(i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); |
} |
} |
template<typename T> inline void |
Converter::BindArgumentsPass::updatePrototype( |
BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) |
{ |
(func->*updateSet)(); |
for (unsigned i = 0; i < set->getSize(); ++i) { |
Value *v = func->getLValue(i); |
const Converter::Location *l = getValueLocation(sub, v); |
// only include values with a matching TGSI register |
if (set->test(i) && l && !conv.code->locals.count(*l)) |
(func->*proto).push_back(v); |
} |
} |
bool |
Converter::BindArgumentsPass::visit(Function *f) |
{ |
sub = conv.getSubroutine(f); |
for (ArrayList::Iterator bi = f->allBBlocks.iterator(); |
!bi.end(); bi.next()) { |
for (Instruction *i = BasicBlock::get(bi)->getFirst(); |
i; i = i->next) { |
if (i->op == OP_CALL && !i->asFlow()->builtin) { |
updateCallArgs(i, &Instruction::setSrc, &Function::ins); |
updateCallArgs(i, &Instruction::setDef, &Function::outs); |
} |
} |
} |
if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) |
return true; |
updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, |
&Function::buildLiveSets, &Function::ins); |
updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, |
&Function::buildDefSets, &Function::outs); |
return true; |
} |
bool |
Converter::run() |
{ |
BasicBlock *entry = new BasicBlock(prog->main); |
BasicBlock *leave = new BasicBlock(prog->main); |
prog->main->setEntry(entry); |
prog->main->setExit(leave); |
setPosition(entry, true); |
sub.cur = getSubroutine(prog->main); |
if (info->io.genUserClip > 0) { |
for (int c = 0; c < 4; ++c) |
clipVtx[c] = getScratch(); |
} |
if (prog->getType() == Program::TYPE_FRAGMENT) { |
Symbol *sv = mkSysVal(SV_POSITION, 3); |
fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); |
mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); |
} |
if (info->io.viewportId >= 0) |
viewport = getScratch(); |
else |
viewport = NULL; |
for (ip = 0; ip < code->scan.num_instructions; ++ip) { |
if (!handleInstruction(&code->insns[ip])) |
return false; |
} |
if (!BindArgumentsPass(*this).run(prog)) |
return false; |
return true; |
} |
} // unnamed namespace |
namespace nv50_ir { |
bool |
Program::makeFromTGSI(struct nv50_ir_prog_info *info) |
{ |
tgsi::Source src(info); |
if (!src.scanSource()) |
return false; |
tlsSize = info->bin.tlsSpace; |
Converter builder(this, &src); |
return builder.run(); |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.cpp |
---|
0,0 → 1,446 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_graph.h" |
#include <limits> |
#include <list> |
#include <stack> |
#include "codegen/nv50_ir.h" |
namespace nv50_ir { |
Graph::Graph() |
{ |
root = NULL; |
size = 0; |
sequence = 0; |
} |
Graph::~Graph() |
{ |
for (IteratorRef it = safeIteratorDFS(); !it->end(); it->next()) |
reinterpret_cast<Node *>(it->get())->cut(); |
} |
void Graph::insert(Node *node) |
{ |
if (!root) |
root = node; |
node->graph = this; |
size++; |
} |
void Graph::Edge::unlink() |
{ |
if (origin) { |
prev[0]->next[0] = next[0]; |
next[0]->prev[0] = prev[0]; |
if (origin->out == this) |
origin->out = (next[0] == this) ? NULL : next[0]; |
--origin->outCount; |
} |
if (target) { |
prev[1]->next[1] = next[1]; |
next[1]->prev[1] = prev[1]; |
if (target->in == this) |
target->in = (next[1] == this) ? NULL : next[1]; |
--target->inCount; |
} |
} |
const char *Graph::Edge::typeStr() const |
{ |
switch (type) { |
case TREE: return "tree"; |
case FORWARD: return "forward"; |
case BACK: return "back"; |
case CROSS: return "cross"; |
case DUMMY: return "dummy"; |
case UNKNOWN: |
default: |
return "unk"; |
} |
} |
Graph::Node::Node(void *priv) : data(priv), |
in(0), out(0), graph(0), |
visited(0), |
inCount(0), outCount(0) |
{ |
// nothing to do |
} |
void Graph::Node::attach(Node *node, Edge::Type kind) |
{ |
Edge *edge = new Edge(this, node, kind); |
// insert head |
if (this->out) { |
edge->next[0] = this->out; |
edge->prev[0] = this->out->prev[0]; |
edge->prev[0]->next[0] = edge; |
this->out->prev[0] = edge; |
} |
this->out = edge; |
if (node->in) { |
edge->next[1] = node->in; |
edge->prev[1] = node->in->prev[1]; |
edge->prev[1]->next[1] = edge; |
node->in->prev[1] = edge; |
} |
node->in = edge; |
++this->outCount; |
++node->inCount; |
assert(graph || node->graph); |
if (!node->graph) |
graph->insert(node); |
if (!graph) |
node->graph->insert(this); |
if (kind == Edge::UNKNOWN) |
graph->classifyEdges(); |
} |
bool Graph::Node::detach(Graph::Node *node) |
{ |
EdgeIterator ei = this->outgoing(); |
for (; !ei.end(); ei.next()) |
if (ei.getNode() == node) |
break; |
if (ei.end()) { |
ERROR("no such node attached\n"); |
return false; |
} |
delete ei.getEdge(); |
return true; |
} |
// Cut a node from the graph, deleting all attached edges. |
void Graph::Node::cut() |
{ |
while (out) |
delete out; |
while (in) |
delete in; |
if (graph) { |
if (graph->root == this) |
graph->root = NULL; |
graph = NULL; |
} |
} |
Graph::Edge::Edge(Node *org, Node *tgt, Type kind) |
{ |
target = tgt; |
origin = org; |
type = kind; |
next[0] = next[1] = this; |
prev[0] = prev[1] = this; |
} |
bool |
Graph::Node::reachableBy(const Node *node, const Node *term) const |
{ |
std::stack<const Node *> stack; |
const Node *pos = NULL; |
const int seq = graph->nextSequence(); |
stack.push(node); |
while (!stack.empty()) { |
pos = stack.top(); |
stack.pop(); |
if (pos == this) |
return true; |
if (pos == term) |
continue; |
for (EdgeIterator ei = pos->outgoing(); !ei.end(); ei.next()) { |
if (ei.getType() == Edge::BACK || ei.getType() == Edge::DUMMY) |
continue; |
if (ei.getNode()->visit(seq)) |
stack.push(ei.getNode()); |
} |
} |
return pos == this; |
} |
class DFSIterator : public Iterator |
{ |
public: |
DFSIterator(Graph *graph, const bool preorder) |
{ |
unsigned int seq = graph->nextSequence(); |
nodes = new Graph::Node * [graph->getSize() + 1]; |
count = 0; |
pos = 0; |
nodes[graph->getSize()] = 0; |
if (graph->getRoot()) { |
graph->getRoot()->visit(seq); |
search(graph->getRoot(), preorder, seq); |
} |
} |
~DFSIterator() |
{ |
if (nodes) |
delete[] nodes; |
} |
void search(Graph::Node *node, const bool preorder, const int sequence) |
{ |
if (preorder) |
nodes[count++] = node; |
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) |
if (ei.getNode()->visit(sequence)) |
search(ei.getNode(), preorder, sequence); |
if (!preorder) |
nodes[count++] = node; |
} |
virtual bool end() const { return pos >= count; } |
virtual void next() { if (pos < count) ++pos; } |
virtual void *get() const { return nodes[pos]; } |
virtual void reset() { pos = 0; } |
protected: |
Graph::Node **nodes; |
int count; |
int pos; |
}; |
IteratorRef Graph::iteratorDFS(bool preorder) |
{ |
return IteratorRef(new DFSIterator(this, preorder)); |
} |
IteratorRef Graph::safeIteratorDFS(bool preorder) |
{ |
return this->iteratorDFS(preorder); |
} |
class CFGIterator : public Iterator |
{ |
public: |
CFGIterator(Graph *graph) |
{ |
nodes = new Graph::Node * [graph->getSize() + 1]; |
count = 0; |
pos = 0; |
nodes[graph->getSize()] = 0; |
// TODO: argh, use graph->sequence instead of tag and just raise it by > 1 |
for (IteratorRef it = graph->iteratorDFS(); !it->end(); it->next()) |
reinterpret_cast<Graph::Node *>(it->get())->tag = 0; |
if (graph->getRoot()) |
search(graph->getRoot(), graph->nextSequence()); |
} |
~CFGIterator() |
{ |
if (nodes) |
delete[] nodes; |
} |
virtual void *get() const { return nodes[pos]; } |
virtual bool end() const { return pos >= count; } |
virtual void next() { if (pos < count) ++pos; } |
virtual void reset() { pos = 0; } |
private: |
void search(Graph::Node *node, const int sequence) |
{ |
Stack bb, cross; |
bb.push(node); |
while (bb.getSize()) { |
node = reinterpret_cast<Graph::Node *>(bb.pop().u.p); |
assert(node); |
if (!node->visit(sequence)) |
continue; |
node->tag = 0; |
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) { |
switch (ei.getType()) { |
case Graph::Edge::TREE: |
case Graph::Edge::FORWARD: |
case Graph::Edge::DUMMY: |
if (++(ei.getNode()->tag) == ei.getNode()->incidentCountFwd()) |
bb.push(ei.getNode()); |
break; |
case Graph::Edge::BACK: |
continue; |
case Graph::Edge::CROSS: |
if (++(ei.getNode()->tag) == 1) |
cross.push(ei.getNode()); |
break; |
default: |
assert(!"unknown edge kind in CFG"); |
break; |
} |
} |
nodes[count++] = node; |
if (bb.getSize() == 0) |
cross.moveTo(bb); |
} |
} |
private: |
Graph::Node **nodes; |
int count; |
int pos; |
}; |
IteratorRef Graph::iteratorCFG() |
{ |
return IteratorRef(new CFGIterator(this)); |
} |
IteratorRef Graph::safeIteratorCFG() |
{ |
return this->iteratorCFG(); |
} |
/** |
* Edge classification: |
* |
* We have a graph and want to classify the edges into one of four types: |
* - TREE: edges that belong to a spanning tree of the graph |
* - FORWARD: edges from a node to a descendent in the spanning tree |
* - BACK: edges from a node to a parent (or itself) in the spanning tree |
* - CROSS: all other edges (because they cross between branches in the |
* spanning tree) |
*/ |
void Graph::classifyEdges() |
{ |
int seq; |
for (IteratorRef it = iteratorDFS(true); !it->end(); it->next()) { |
Node *node = reinterpret_cast<Node *>(it->get()); |
node->visit(0); |
node->tag = 0; |
} |
classifyDFS(root, (seq = 0)); |
sequence = seq; |
} |
void Graph::classifyDFS(Node *curr, int& seq) |
{ |
Graph::Edge *edge; |
Graph::Node *node; |
curr->visit(++seq); |
curr->tag = 1; |
for (edge = curr->out; edge; edge = edge->next[0]) { |
node = edge->target; |
if (edge->type == Edge::DUMMY) |
continue; |
if (node->getSequence() == 0) { |
edge->type = Edge::TREE; |
classifyDFS(node, seq); |
} else |
if (node->getSequence() > curr->getSequence()) { |
edge->type = Edge::FORWARD; |
} else { |
edge->type = node->tag ? Edge::BACK : Edge::CROSS; |
} |
} |
for (edge = curr->in; edge; edge = edge->next[1]) { |
node = edge->origin; |
if (edge->type == Edge::DUMMY) |
continue; |
if (node->getSequence() == 0) { |
edge->type = Edge::TREE; |
classifyDFS(node, seq); |
} else |
if (node->getSequence() > curr->getSequence()) { |
edge->type = Edge::FORWARD; |
} else { |
edge->type = node->tag ? Edge::BACK : Edge::CROSS; |
} |
} |
curr->tag = 0; |
} |
// @dist is indexed by Node::tag, returns -1 if no path found |
int |
Graph::findLightestPathWeight(Node *a, Node *b, const std::vector<int> &weight) |
{ |
std::vector<int> path(weight.size(), std::numeric_limits<int>::max()); |
std::list<Node *> nodeList; |
const int seq = nextSequence(); |
path[a->tag] = 0; |
for (Node *c = a; c && c != b;) { |
const int p = path[c->tag] + weight[c->tag]; |
for (EdgeIterator ei = c->outgoing(); !ei.end(); ei.next()) { |
Node *t = ei.getNode(); |
if (t->getSequence() < seq) { |
if (path[t->tag] == std::numeric_limits<int>::max()) |
nodeList.push_front(t); |
if (p < path[t->tag]) |
path[t->tag] = p; |
} |
} |
c->visit(seq); |
Node *next = NULL; |
for (std::list<Node *>::iterator n = nodeList.begin(); |
n != nodeList.end(); ++n) { |
if (!next || path[(*n)->tag] < path[next->tag]) |
next = *n; |
if ((*n) == c) { |
// erase visited |
n = nodeList.erase(n); |
--n; |
} |
} |
c = next; |
} |
if (path[b->tag] == std::numeric_limits<int>::max()) |
return -1; |
return path[b->tag]; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_graph.h |
---|
0,0 → 1,228 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_GRAPH_H__ |
#define __NV50_IR_GRAPH_H__ |
#include "codegen/nv50_ir_util.h" |
#include <vector> |
namespace nv50_ir { |
#define ITER_NODE(x) reinterpret_cast<Graph::Node *>((x).get()) |
#define ITER_EDGE(x) reinterpret_cast<Graph::Edge *>((x).get()) |
// A connected graph. |
class Graph |
{ |
public: |
class Node; |
class Edge |
{ |
public: |
enum Type |
{ |
UNKNOWN, |
TREE, |
FORWARD, |
BACK, |
CROSS, // e.g. loop break |
DUMMY |
}; |
Edge(Node *dst, Node *src, Type kind); |
~Edge() { unlink(); } |
inline Node *getOrigin() const { return origin; } |
inline Node *getTarget() const { return target; } |
inline Type getType() const { return type; } |
const char *typeStr() const; |
private: |
Node *origin; |
Node *target; |
Type type; |
Edge *next[2]; // next edge outgoing/incident from/to origin/target |
Edge *prev[2]; |
void unlink(); |
friend class Graph; |
}; |
class EdgeIterator : public Iterator |
{ |
public: |
EdgeIterator() : e(0), t(0), d(0), rev(false) { } |
EdgeIterator(Graph::Edge *first, int dir, bool reverse) |
: d(dir), rev(reverse) |
{ |
t = e = ((rev && first) ? first->prev[d] : first); |
} |
virtual void next() |
{ |
Graph::Edge *n = (rev ? e->prev[d] : e->next[d]); |
e = (n == t ? NULL : n); |
} |
virtual bool end() const { return !e; } |
virtual void *get() const { return e; } |
inline Node *getNode() const { assert(e); return d ? |
e->origin : e->target; } |
inline Edge *getEdge() const { return e; } |
inline Edge::Type getType() { return e ? e->getType() : Edge::UNKNOWN; } |
private: |
Graph::Edge *e; |
Graph::Edge *t; |
int d; |
bool rev; |
}; |
class Node |
{ |
public: |
Node(void *); |
~Node() { cut(); } |
void attach(Node *, Edge::Type); |
bool detach(Node *); |
void cut(); |
inline EdgeIterator outgoing(bool reverse = false) const; |
inline EdgeIterator incident(bool reverse = false) const; |
inline Node *parent() const; // returns NULL if count(incident edges) != 1 |
bool reachableBy(const Node *node, const Node *term) const; |
inline bool visit(int); |
inline int getSequence() const; |
inline int incidentCountFwd() const; // count of incident non-back edges |
inline int incidentCount() const { return inCount; } |
inline int outgoingCount() const { return outCount; } |
Graph *getGraph() const { return graph; } |
void *data; |
private: |
Edge *in; |
Edge *out; |
Graph *graph; |
int visited; |
int16_t inCount; |
int16_t outCount; |
public: |
int tag; // for temporary use |
friend class Graph; |
}; |
public: |
Graph(); |
~Graph(); // does *not* free the nodes (make it an option ?) |
inline Node *getRoot() const { return root; } |
inline unsigned int getSize() const { return size; } |
inline int nextSequence(); |
void insert(Node *node); // attach to or set as root |
IteratorRef iteratorDFS(bool preorder = true); |
IteratorRef iteratorCFG(); |
// safe iterators are unaffected by changes to the *edges* of the graph |
IteratorRef safeIteratorDFS(bool preorder = true); |
IteratorRef safeIteratorCFG(); |
void classifyEdges(); |
// @weights: indexed by Node::tag |
int findLightestPathWeight(Node *, Node *, const std::vector<int>& weights); |
private: |
void classifyDFS(Node *, int&); |
private: |
Node *root; |
unsigned int size; |
int sequence; |
}; |
int Graph::nextSequence() |
{ |
return ++sequence; |
} |
Graph::Node *Graph::Node::parent() const |
{ |
if (inCount != 1) |
return NULL; |
assert(in); |
return in->origin; |
} |
bool Graph::Node::visit(int v) |
{ |
if (visited == v) |
return false; |
visited = v; |
return true; |
} |
int Graph::Node::getSequence() const |
{ |
return visited; |
} |
Graph::EdgeIterator Graph::Node::outgoing(bool reverse) const |
{ |
return EdgeIterator(out, 0, reverse); |
} |
Graph::EdgeIterator Graph::Node::incident(bool reverse) const |
{ |
return EdgeIterator(in, 1, reverse); |
} |
int Graph::Node::incidentCountFwd() const |
{ |
int n = 0; |
for (EdgeIterator ei = incident(); !ei.end(); ei.next()) |
if (ei.getType() != Edge::BACK) |
++n; |
return n; |
} |
} // namespace nv50_ir |
#endif // __NV50_IR_GRAPH_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h |
---|
0,0 → 1,420 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_INLINES_H__ |
#define __NV50_IR_INLINES_H__ |
static inline CondCode reverseCondCode(CondCode cc) |
{ |
static const uint8_t ccRev[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; |
return static_cast<CondCode>(ccRev[cc & 7] | (cc & ~7)); |
} |
static inline CondCode inverseCondCode(CondCode cc) |
{ |
return static_cast<CondCode>(cc ^ 7); |
} |
static inline bool isMemoryFile(DataFile f) |
{ |
return (f >= FILE_MEMORY_CONST && f <= FILE_MEMORY_LOCAL); |
} |
// contrary to asTex(), this will never include SULD/SUST |
static inline bool isTextureOp(operation op) |
{ |
return (op >= OP_TEX && op <= OP_TEXPREP); |
} |
static inline bool isSurfaceOp(operation op) |
{ |
return (op >= OP_SULDB && op <= OP_SULEA); |
} |
static inline unsigned int typeSizeof(DataType ty) |
{ |
switch (ty) { |
case TYPE_U8: |
case TYPE_S8: |
return 1; |
case TYPE_F16: |
case TYPE_U16: |
case TYPE_S16: |
return 2; |
case TYPE_F32: |
case TYPE_U32: |
case TYPE_S32: |
return 4; |
case TYPE_F64: |
case TYPE_U64: |
case TYPE_S64: |
return 8; |
case TYPE_B96: |
return 12; |
case TYPE_B128: |
return 16; |
default: |
return 0; |
} |
} |
static inline unsigned int typeSizeofLog2(DataType ty) |
{ |
switch (ty) { |
case TYPE_F16: |
case TYPE_U16: |
case TYPE_S16: |
return 1; |
case TYPE_F32: |
case TYPE_U32: |
case TYPE_S32: |
return 2; |
case TYPE_F64: |
case TYPE_U64: |
case TYPE_S64: |
return 3; |
case TYPE_B96: |
case TYPE_B128: |
return 4; |
case TYPE_U8: |
case TYPE_S8: |
default: |
return 0; |
} |
} |
static inline DataType typeOfSize(unsigned int size, |
bool flt = false, bool sgn = false) |
{ |
switch (size) { |
case 1: return sgn ? TYPE_S8 : TYPE_U8; |
case 2: return flt ? TYPE_F16 : (sgn ? TYPE_S16 : TYPE_U16); |
case 8: return flt ? TYPE_F64 : (sgn ? TYPE_S64 : TYPE_U64); |
case 12: return TYPE_B96; |
case 16: return TYPE_B128; |
case 4: |
return flt ? TYPE_F32 : (sgn ? TYPE_S32 : TYPE_U32); |
default: |
return TYPE_NONE; |
} |
} |
static inline bool isFloatType(DataType ty) |
{ |
return (ty >= TYPE_F16 && ty <= TYPE_F64); |
} |
static inline bool isSignedIntType(DataType ty) |
{ |
return (ty == TYPE_S8 || ty == TYPE_S16 || ty == TYPE_S32); |
} |
static inline bool isSignedType(DataType ty) |
{ |
switch (ty) { |
case TYPE_NONE: |
case TYPE_U8: |
case TYPE_U16: |
case TYPE_U32: |
case TYPE_B96: |
case TYPE_B128: |
return false; |
default: |
return true; |
} |
} |
static inline DataType intTypeToSigned(DataType ty) |
{ |
switch (ty) { |
case TYPE_U32: return TYPE_S32; |
case TYPE_U16: return TYPE_S16; |
case TYPE_U8: return TYPE_S8; |
default: |
return ty; |
} |
} |
const ValueRef *ValueRef::getIndirect(int dim) const |
{ |
return isIndirect(dim) ? &insn->src(indirect[dim]) : NULL; |
} |
DataFile ValueRef::getFile() const |
{ |
return value ? value->reg.file : FILE_NULL; |
} |
unsigned int ValueRef::getSize() const |
{ |
return value ? value->reg.size : 0; |
} |
Value *ValueRef::rep() const |
{ |
assert(value); |
return value->join; |
} |
Value *ValueDef::rep() const |
{ |
assert(value); |
return value->join; |
} |
DataFile ValueDef::getFile() const |
{ |
return value ? value->reg.file : FILE_NULL; |
} |
unsigned int ValueDef::getSize() const |
{ |
return value ? value->reg.size : 0; |
} |
void ValueDef::setSSA(LValue *lval) |
{ |
origin = value->asLValue(); |
set(lval); |
} |
const LValue *ValueDef::preSSA() const |
{ |
return origin; |
} |
Instruction *Value::getInsn() const |
{ |
return defs.empty() ? NULL : defs.front()->getInsn(); |
} |
Instruction *Value::getUniqueInsn() const |
{ |
if (defs.empty()) |
return NULL; |
// after regalloc, the definitions of coalesced values are linked |
if (join != this) { |
for (DefCIterator it = defs.begin(); it != defs.end(); ++it) |
if ((*it)->get() == this) |
return (*it)->getInsn(); |
// should be unreachable and trigger assertion at the end |
} |
#ifdef DEBUG |
if (reg.data.id < 0) { |
int n = 0; |
for (DefCIterator it = defs.begin(); n < 2 && it != defs.end(); ++it) |
if ((*it)->get() == this) // don't count joined values |
++n; |
if (n > 1) |
WARN("value %%%i not uniquely defined\n", id); // return NULL ? |
} |
#endif |
assert(defs.front()->get() == this); |
return defs.front()->getInsn(); |
} |
inline bool Instruction::constrainedDefs() const |
{ |
return defExists(1) || op == OP_UNION; |
} |
Value *Instruction::getIndirect(int s, int dim) const |
{ |
return srcs[s].isIndirect(dim) ? getSrc(srcs[s].indirect[dim]) : NULL; |
} |
Value *Instruction::getPredicate() const |
{ |
return (predSrc >= 0) ? getSrc(predSrc) : NULL; |
} |
void Instruction::setFlagsDef(int d, Value *val) |
{ |
if (val) { |
if (flagsDef < 0) |
flagsDef = d; |
setDef(flagsDef, val); |
} else { |
if (flagsDef >= 0) { |
setDef(flagsDef, NULL); |
flagsDef = -1; |
} |
} |
} |
void Instruction::setFlagsSrc(int s, Value *val) |
{ |
flagsSrc = s; |
setSrc(flagsSrc, val); |
} |
Value *TexInstruction::getIndirectR() const |
{ |
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL; |
} |
Value *TexInstruction::getIndirectS() const |
{ |
return tex.rIndirectSrc >= 0 ? getSrc(tex.rIndirectSrc) : NULL; |
} |
CmpInstruction *Instruction::asCmp() |
{ |
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP) |
return static_cast<CmpInstruction *>(this); |
return NULL; |
} |
const CmpInstruction *Instruction::asCmp() const |
{ |
if (op >= OP_SET_AND && op <= OP_SLCT && op != OP_SELP) |
return static_cast<const CmpInstruction *>(this); |
return NULL; |
} |
FlowInstruction *Instruction::asFlow() |
{ |
if (op >= OP_BRA && op <= OP_JOIN) |
return static_cast<FlowInstruction *>(this); |
return NULL; |
} |
const FlowInstruction *Instruction::asFlow() const |
{ |
if (op >= OP_BRA && op <= OP_JOIN) |
return static_cast<const FlowInstruction *>(this); |
return NULL; |
} |
TexInstruction *Instruction::asTex() |
{ |
if (op >= OP_TEX && op <= OP_SULEA) |
return static_cast<TexInstruction *>(this); |
return NULL; |
} |
const TexInstruction *Instruction::asTex() const |
{ |
if (op >= OP_TEX && op <= OP_SULEA) |
return static_cast<const TexInstruction *>(this); |
return NULL; |
} |
static inline Instruction *cloneForward(Function *ctx, Instruction *obj) |
{ |
DeepClonePolicy<Function> pol(ctx); |
for (int i = 0; obj->srcExists(i); ++i) |
pol.set(obj->getSrc(i), obj->getSrc(i)); |
return obj->clone(pol); |
} |
// XXX: use a virtual function so we're really really safe ? |
LValue *Value::asLValue() |
{ |
if (reg.file >= FILE_GPR && reg.file <= FILE_ADDRESS) |
return static_cast<LValue *>(this); |
return NULL; |
} |
Symbol *Value::asSym() |
{ |
if (reg.file >= FILE_MEMORY_CONST) |
return static_cast<Symbol *>(this); |
return NULL; |
} |
const Symbol *Value::asSym() const |
{ |
if (reg.file >= FILE_MEMORY_CONST) |
return static_cast<const Symbol *>(this); |
return NULL; |
} |
void Symbol::setOffset(int32_t offset) |
{ |
reg.data.offset = offset; |
} |
void Symbol::setAddress(Symbol *base, int32_t offset) |
{ |
baseSym = base; |
reg.data.offset = offset; |
} |
void Symbol::setSV(SVSemantic sv, uint32_t index) |
{ |
reg.data.sv.sv = sv; |
reg.data.sv.index = index; |
} |
ImmediateValue *Value::asImm() |
{ |
if (reg.file == FILE_IMMEDIATE) |
return static_cast<ImmediateValue *>(this); |
return NULL; |
} |
const ImmediateValue *Value::asImm() const |
{ |
if (reg.file == FILE_IMMEDIATE) |
return static_cast<const ImmediateValue *>(this); |
return NULL; |
} |
Value *Value::get(Iterator &it) |
{ |
return reinterpret_cast<Value *>(it.get()); |
} |
bool BasicBlock::reachableBy(const BasicBlock *by, const BasicBlock *term) |
{ |
return cfg.reachableBy(&by->cfg, &term->cfg); |
} |
BasicBlock *BasicBlock::get(Iterator &iter) |
{ |
return reinterpret_cast<BasicBlock *>(iter.get()); |
} |
BasicBlock *BasicBlock::get(Graph::Node *node) |
{ |
assert(node); |
return reinterpret_cast<BasicBlock *>(node->data); |
} |
Function *Function::get(Graph::Node *node) |
{ |
assert(node); |
return reinterpret_cast<Function *>(node->data); |
} |
LValue *Function::getLValue(int id) |
{ |
assert((unsigned int)id < (unsigned int)allLValues.getSize()); |
return reinterpret_cast<LValue *>(allLValues.get(id)); |
} |
#endif // __NV50_IR_INLINES_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp |
---|
0,0 → 1,292 |
/* |
* Copyright 2011 Christoph Bumiller |
* 2014 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_build_util.h" |
#include "codegen/nv50_ir_target_nvc0.h" |
#include "codegen/nv50_ir_lowering_gm107.h" |
#include <limits> |
namespace nv50_ir { |
#define QOP_ADD 0 |
#define QOP_SUBR 1 |
#define QOP_SUB 2 |
#define QOP_MOV2 3 |
// UL UR LL LR |
#define QUADOP(q, r, s, t) \ |
((QOP_##q << 6) | (QOP_##r << 4) | \ |
(QOP_##s << 2) | (QOP_##t << 0)) |
bool |
GM107LoweringPass::handleManualTXD(TexInstruction *i) |
{ |
static const uint8_t qOps[4][2] = |
{ |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 |
}; |
Value *def[4][4]; |
Value *crd[3]; |
Value *tmp; |
Instruction *tex, *add; |
Value *zero = bld.loadImm(bld.getSSA(), 0); |
int l, c; |
const int dim = i->tex.target.getDim(); |
const int array = i->tex.target.isArray(); |
i->op = OP_TEX; // no need to clone dPdx/dPdy later |
for (c = 0; c < dim; ++c) |
crd[c] = bld.getScratch(); |
tmp = bld.getScratch(); |
for (l = 0; l < 4; ++l) { |
// mov coordinates from lane l to all lanes |
bld.mkOp(OP_QUADON, TYPE_NONE, NULL); |
for (c = 0; c < dim; ++c) { |
bld.mkOp2(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), bld.mkImm(l)); |
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero); |
add->subOp = 0x00; |
add->lanes = 1; /* abused for .ndv */ |
} |
// add dPdx from lane l to lanes dx |
for (c = 0; c < dim; ++c) { |
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l)); |
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); |
add->subOp = qOps[l][0]; |
add->lanes = 1; /* abused for .ndv */ |
} |
// add dPdy from lane l to lanes dy |
for (c = 0; c < dim; ++c) { |
bld.mkOp2(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l)); |
add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]); |
add->subOp = qOps[l][1]; |
add->lanes = 1; /* abused for .ndv */ |
} |
// texture |
bld.insert(tex = cloneForward(func, i)); |
for (c = 0; c < dim; ++c) |
tex->setSrc(c + array, crd[c]); |
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); |
// save results |
for (c = 0; i->defExists(c); ++c) { |
Instruction *mov; |
def[c][l] = bld.getSSA(); |
mov = bld.mkMov(def[c][l], tex->getDef(c)); |
mov->fixed = 1; |
mov->lanes = 1 << l; |
} |
} |
for (c = 0; i->defExists(c); ++c) { |
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); |
for (l = 0; l < 4; ++l) |
u->setSrc(l, def[c][l]); |
} |
i->bb->remove(i); |
return true; |
} |
bool |
GM107LoweringPass::handleDFDX(Instruction *insn) |
{ |
Instruction *shfl; |
int qop = 0, xid = 0; |
switch (insn->op) { |
case OP_DFDX: |
qop = QUADOP(SUB, SUBR, SUB, SUBR); |
xid = 1; |
break; |
case OP_DFDY: |
qop = QUADOP(SUB, SUB, SUBR, SUBR); |
xid = 2; |
break; |
default: |
assert(!"invalid dfdx opcode"); |
break; |
} |
shfl = bld.mkOp2(OP_SHFL, TYPE_F32, bld.getScratch(), |
insn->getSrc(0), bld.mkImm(xid)); |
shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; |
insn->op = OP_QUADOP; |
insn->subOp = qop; |
insn->lanes = 0; /* abused for !.ndv */ |
insn->setSrc(1, insn->getSrc(0)); |
insn->setSrc(0, shfl->getDef(0)); |
return true; |
} |
bool |
GM107LoweringPass::handlePFETCH(Instruction *i) |
{ |
Value *tmp0 = bld.getScratch(); |
Value *tmp1 = bld.getScratch(); |
Value *tmp2 = bld.getScratch(); |
bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); |
bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); |
bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); |
bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); |
if (i->getSrc(1)) |
bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1)); |
else |
bld.mkOp1(OP_MOV , TYPE_U32, tmp2, i->getSrc(0)); |
bld.mkOp3(OP_MAD , TYPE_U32, tmp0, tmp0, tmp1, tmp2); |
i->setSrc(0, tmp0); |
i->setSrc(1, NULL); |
return true; |
} |
bool |
GM107LoweringPass::handlePOPCNT(Instruction *i) |
{ |
Value *tmp = bld.mkOp2v(OP_AND, i->sType, bld.getScratch(), |
i->getSrc(0), i->getSrc(1)); |
i->setSrc(0, tmp); |
i->setSrc(1, NULL); |
return TRUE; |
} |
// |
// - add quadop dance for texturing |
// - put FP outputs in GPRs |
// - convert instruction sequences |
// |
bool |
GM107LoweringPass::visit(Instruction *i) |
{ |
bld.setPosition(i, false); |
if (i->cc != CC_ALWAYS) |
checkPredicate(i); |
switch (i->op) { |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXF: |
case OP_TXG: |
return handleTEX(i->asTex()); |
case OP_TXD: |
return handleTXD(i->asTex()); |
case OP_TXLQ: |
return handleTXLQ(i->asTex()); |
case OP_TXQ: |
return handleTXQ(i->asTex()); |
case OP_EX2: |
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); |
i->setSrc(0, i->getDef(0)); |
break; |
case OP_POW: |
return handlePOW(i); |
case OP_DIV: |
return handleDIV(i); |
case OP_MOD: |
return handleMOD(i); |
case OP_SQRT: |
return handleSQRT(i); |
case OP_EXPORT: |
return handleEXPORT(i); |
case OP_PFETCH: |
return handlePFETCH(i); |
case OP_EMIT: |
case OP_RESTART: |
return handleOUT(i); |
case OP_RDSV: |
return handleRDSV(i); |
case OP_WRSV: |
return handleWRSV(i); |
case OP_LOAD: |
if (i->src(0).getFile() == FILE_SHADER_INPUT) { |
if (prog->getType() == Program::TYPE_COMPUTE) { |
i->getSrc(0)->reg.file = FILE_MEMORY_CONST; |
i->getSrc(0)->reg.fileIndex = 0; |
} else |
if (prog->getType() == Program::TYPE_GEOMETRY && |
i->src(0).isIndirect(0)) { |
// XXX: this assumes vec4 units |
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 0), bld.mkImm(4)); |
i->setIndirect(0, 0, ptr); |
i->op = OP_VFETCH; |
} else { |
i->op = OP_VFETCH; |
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP |
} |
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) { |
if (i->src(0).isIndirect(1)) { |
Value *ptr; |
if (i->src(0).isIndirect(0)) |
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 1), bld.mkImm(0x1010), |
i->getIndirect(0, 0)); |
else |
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 1), bld.mkImm(16)); |
i->setIndirect(0, 1, NULL); |
i->setIndirect(0, 0, ptr); |
i->subOp = NV50_IR_SUBOP_LDC_IS; |
} |
} |
break; |
case OP_ATOM: |
{ |
const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; |
handleATOM(i); |
handleCasExch(i, cctl); |
} |
break; |
case OP_SULDB: |
case OP_SULDP: |
case OP_SUSTB: |
case OP_SUSTP: |
case OP_SUREDB: |
case OP_SUREDP: |
handleSurfaceOpNVE4(i->asTex()); |
break; |
case OP_DFDX: |
case OP_DFDY: |
handleDFDX(i); |
break; |
case OP_POPCNT: |
handlePOPCNT(i); |
break; |
default: |
break; |
} |
return true; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h |
---|
0,0 → 1,18 |
#include "codegen/nv50_ir_lowering_nvc0.h" |
namespace nv50_ir { |
class GM107LoweringPass : public NVC0LoweringPass |
{ |
public: |
GM107LoweringPass(Program *p) : NVC0LoweringPass(p) {} |
private: |
virtual bool visit(Instruction *); |
virtual bool handleManualTXD(TexInstruction *); |
bool handleDFDX(Instruction *); |
bool handlePFETCH(Instruction *); |
bool handlePOPCNT(Instruction *); |
}; |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp |
---|
0,0 → 1,1394 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_build_util.h" |
#include "codegen/nv50_ir_target_nv50.h" |
namespace nv50_ir { |
// nv50 doesn't support 32 bit integer multiplication |
// |
// ah al * bh bl = LO32: (al * bh + ah * bl) << 16 + (al * bl) |
// ------------------- |
// al*bh 00 HI32: (al * bh + ah * bl) >> 16 + (ah * bh) + |
// ah*bh 00 00 ( carry1) << 16 + ( carry2) |
// al*bl |
// ah*bl 00 |
// |
// fffe0001 + fffe0001 |
// |
// Note that this sort of splitting doesn't work for signed values, so we |
// compute the sign on those manually and then perform an unsigned multiply. |
static bool |
expandIntegerMUL(BuildUtil *bld, Instruction *mul) |
{ |
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH; |
DataType fTy; // full type |
switch (mul->sType) { |
case TYPE_S32: fTy = TYPE_U32; break; |
case TYPE_S64: fTy = TYPE_U64; break; |
default: fTy = mul->sType; break; |
} |
DataType hTy; // half type |
switch (fTy) { |
case TYPE_U32: hTy = TYPE_U16; break; |
case TYPE_U64: hTy = TYPE_U32; break; |
default: |
return false; |
} |
unsigned int fullSize = typeSizeof(fTy); |
unsigned int halfSize = typeSizeof(hTy); |
Instruction *i[9]; |
bld->setPosition(mul, true); |
Value *s[2]; |
Value *a[2], *b[2]; |
Value *t[4]; |
for (int j = 0; j < 4; ++j) |
t[j] = bld->getSSA(fullSize); |
s[0] = mul->getSrc(0); |
s[1] = mul->getSrc(1); |
if (isSignedType(mul->sType)) { |
s[0] = bld->getSSA(fullSize); |
s[1] = bld->getSSA(fullSize); |
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0)); |
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1)); |
} |
// split sources into halves |
i[0] = bld->mkSplit(a, halfSize, s[0]); |
i[1] = bld->mkSplit(b, halfSize, s[1]); |
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); |
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); |
i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8)); |
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]); |
if (highResult) { |
Value *c[2]; |
Value *r[5]; |
Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8)); |
c[0] = bld->getSSA(1, FILE_FLAGS); |
c[1] = bld->getSSA(1, FILE_FLAGS); |
for (int j = 0; j < 5; ++j) |
r[j] = bld->getSSA(fullSize); |
i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8)); |
i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm); |
bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]); |
bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]); |
i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]); |
// set carry defs / sources |
i[3]->setFlagsDef(1, c[0]); |
// actual result required in negative case, but ignored for |
// unsigned. for some reason the compiler ends up dropping the whole |
// instruction if the destination is unused but the flags are. |
if (isSignedType(mul->sType)) |
i[4]->setFlagsDef(1, c[1]); |
else |
i[4]->setFlagsDef(0, c[1]); |
i[6]->setPredicate(CC_C, c[0]); |
i[5]->setFlagsSrc(3, c[1]); |
if (isSignedType(mul->sType)) { |
Value *cc[2]; |
Value *rr[7]; |
Value *one = bld->getSSA(fullSize); |
bld->loadImm(one, 1); |
for (int j = 0; j < 7; j++) |
rr[j] = bld->getSSA(fullSize); |
// NOTE: this logic uses predicates because splitting basic blocks is |
// ~impossible during the SSA phase. The RA relies on a correlation |
// between edge order and phi node sources. |
// Set the sign of the result based on the inputs |
bld->mkOp2(OP_XOR, fTy, NULL, mul->getSrc(0), mul->getSrc(1)) |
->setFlagsDef(0, (cc[0] = bld->getSSA(1, FILE_FLAGS))); |
// 1s complement of 64-bit value |
bld->mkOp1(OP_NOT, fTy, rr[0], r[4]) |
->setPredicate(CC_S, cc[0]); |
bld->mkOp1(OP_NOT, fTy, rr[1], t[3]) |
->setPredicate(CC_S, cc[0]); |
// add to low 32-bits, keep track of the carry |
Instruction *n = bld->mkOp2(OP_ADD, fTy, NULL, rr[1], one); |
n->setPredicate(CC_S, cc[0]); |
n->setFlagsDef(0, (cc[1] = bld->getSSA(1, FILE_FLAGS))); |
// If there was a carry, add 1 to the upper 32 bits |
// XXX: These get executed even if they shouldn't be |
bld->mkOp2(OP_ADD, fTy, rr[2], rr[0], one) |
->setPredicate(CC_C, cc[1]); |
bld->mkMov(rr[3], rr[0]) |
->setPredicate(CC_NC, cc[1]); |
bld->mkOp2(OP_UNION, fTy, rr[4], rr[2], rr[3]); |
// Merge the results from the negative and non-negative paths |
bld->mkMov(rr[5], rr[4]) |
->setPredicate(CC_S, cc[0]); |
bld->mkMov(rr[6], r[4]) |
->setPredicate(CC_NS, cc[0]); |
bld->mkOp2(OP_UNION, mul->sType, mul->getDef(0), rr[5], rr[6]); |
} else { |
bld->mkMov(mul->getDef(0), r[4]); |
} |
} else { |
bld->mkMov(mul->getDef(0), t[3]); |
} |
delete_Instruction(bld->getProgram(), mul); |
for (int j = 2; j <= (highResult ? 5 : 4); ++j) |
if (i[j]) |
i[j]->sType = hTy; |
return true; |
} |
#define QOP_ADD 0 |
#define QOP_SUBR 1 |
#define QOP_SUB 2 |
#define QOP_MOV2 3 |
// UL UR LL LR |
#define QUADOP(q, r, s, t) \ |
((QOP_##q << 6) | (QOP_##r << 4) | \ |
(QOP_##s << 2) | (QOP_##t << 0)) |
class NV50LegalizePostRA : public Pass |
{ |
private: |
virtual bool visit(Function *); |
virtual bool visit(BasicBlock *); |
void handlePRERET(FlowInstruction *); |
void replaceZero(Instruction *); |
LValue *r63; |
}; |
bool |
NV50LegalizePostRA::visit(Function *fn) |
{ |
Program *prog = fn->getProgram(); |
r63 = new_LValue(fn, FILE_GPR); |
r63->reg.data.id = 63; |
// this is actually per-program, but we can do it all on visiting main() |
std::list<Instruction *> *outWrites = |
reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); |
if (outWrites) { |
for (std::list<Instruction *>::iterator it = outWrites->begin(); |
it != outWrites->end(); ++it) |
(*it)->getSrc(1)->defs.front()->getInsn()->setDef(0, (*it)->getSrc(0)); |
// instructions will be deleted on exit |
outWrites->clear(); |
} |
return true; |
} |
void |
NV50LegalizePostRA::replaceZero(Instruction *i) |
{ |
for (int s = 0; i->srcExists(s); ++s) { |
ImmediateValue *imm = i->getSrc(s)->asImm(); |
if (imm && imm->reg.data.u64 == 0) |
i->setSrc(s, r63); |
} |
} |
// Emulate PRERET: jump to the target and call to the origin from there |
// |
// WARNING: atm only works if BBs are affected by at most a single PRERET |
// |
// BB:0 |
// preret BB:3 |
// (...) |
// BB:3 |
// (...) |
// ---> |
// BB:0 |
// bra BB:3 + n0 (directly to the call; move to beginning of BB and fixate) |
// (...) |
// BB:3 |
// bra BB:3 + n1 (skip the call) |
// call BB:0 + n2 (skip bra at beginning of BB:0) |
// (...) |
void |
NV50LegalizePostRA::handlePRERET(FlowInstruction *pre) |
{ |
BasicBlock *bbE = pre->bb; |
BasicBlock *bbT = pre->target.bb; |
pre->subOp = NV50_IR_SUBOP_EMU_PRERET + 0; |
bbE->remove(pre); |
bbE->insertHead(pre); |
Instruction *skip = new_FlowInstruction(func, OP_PRERET, bbT); |
Instruction *call = new_FlowInstruction(func, OP_PRERET, bbE); |
bbT->insertHead(call); |
bbT->insertHead(skip); |
// NOTE: maybe split blocks to prevent the instructions from moving ? |
skip->subOp = NV50_IR_SUBOP_EMU_PRERET + 1; |
call->subOp = NV50_IR_SUBOP_EMU_PRERET + 2; |
} |
bool |
NV50LegalizePostRA::visit(BasicBlock *bb) |
{ |
Instruction *i, *next; |
// remove pseudo operations and non-fixed no-ops, split 64 bit operations |
for (i = bb->getFirst(); i; i = next) { |
next = i->next; |
if (i->isNop()) { |
bb->remove(i); |
} else |
if (i->op == OP_PRERET && prog->getTarget()->getChipset() < 0xa0) { |
handlePRERET(i->asFlow()); |
} else { |
// TODO: We will want to do this before register allocation, |
// since have to use a $c register for the carry flag. |
if (typeSizeof(i->dType) == 8) { |
Instruction *hi = BuildUtil::split64BitOpPostRA(func, i, r63, NULL); |
if (hi) |
next = hi; |
} |
if (i->op != OP_MOV && i->op != OP_PFETCH && |
i->op != OP_BAR && |
(!i->defExists(0) || i->def(0).getFile() != FILE_ADDRESS)) |
replaceZero(i); |
} |
} |
if (!bb->getEntry()) |
return true; |
return true; |
} |
class NV50LegalizeSSA : public Pass |
{ |
public: |
NV50LegalizeSSA(Program *); |
virtual bool visit(BasicBlock *bb); |
private: |
void propagateWriteToOutput(Instruction *); |
void handleDIV(Instruction *); |
void handleMOD(Instruction *); |
void handleMUL(Instruction *); |
void handleAddrDef(Instruction *); |
inline bool isARL(const Instruction *) const; |
BuildUtil bld; |
std::list<Instruction *> *outWrites; |
}; |
NV50LegalizeSSA::NV50LegalizeSSA(Program *prog) |
{ |
bld.setProgram(prog); |
if (prog->optLevel >= 2 && |
(prog->getType() == Program::TYPE_GEOMETRY || |
prog->getType() == Program::TYPE_VERTEX)) |
outWrites = |
reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); |
else |
outWrites = NULL; |
} |
void |
NV50LegalizeSSA::propagateWriteToOutput(Instruction *st) |
{ |
if (st->src(0).isIndirect(0) || st->getSrc(1)->refCount() != 1) |
return; |
// check def instruction can store |
Instruction *di = st->getSrc(1)->defs.front()->getInsn(); |
// TODO: move exports (if beneficial) in common opt pass |
if (di->isPseudo() || isTextureOp(di->op) || di->defCount(0xff, true) > 1) |
return; |
for (int s = 0; di->srcExists(s); ++s) |
if (di->src(s).getFile() == FILE_IMMEDIATE) |
return; |
if (prog->getType() == Program::TYPE_GEOMETRY) { |
// Only propagate output writes in geometry shaders when we can be sure |
// that we are propagating to the same output vertex. |
if (di->bb != st->bb) |
return; |
Instruction *i; |
for (i = di; i != st; i = i->next) { |
if (i->op == OP_EMIT || i->op == OP_RESTART) |
return; |
} |
assert(i); // st after di |
} |
// We cannot set defs to non-lvalues before register allocation, so |
// save & remove (to save registers) the exports and replace later. |
outWrites->push_back(st); |
st->bb->remove(st); |
} |
bool |
NV50LegalizeSSA::isARL(const Instruction *i) const |
{ |
ImmediateValue imm; |
if (i->op != OP_SHL || i->src(0).getFile() != FILE_GPR) |
return false; |
if (!i->src(1).getImmediate(imm)) |
return false; |
return imm.isInteger(0); |
} |
void |
NV50LegalizeSSA::handleAddrDef(Instruction *i) |
{ |
Instruction *arl; |
i->getDef(0)->reg.size = 2; // $aX are only 16 bit |
// PFETCH can always write to $a |
if (i->op == OP_PFETCH) |
return; |
// only ADDR <- SHL(GPR, IMM) and ADDR <- ADD(ADDR, IMM) are valid |
if (i->srcExists(1) && i->src(1).getFile() == FILE_IMMEDIATE) { |
if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) |
return; |
if (i->op == OP_ADD && i->src(0).getFile() == FILE_ADDRESS) |
return; |
} |
// turn $a sources into $r sources (can't operate on $a) |
for (int s = 0; i->srcExists(s); ++s) { |
Value *a = i->getSrc(s); |
Value *r; |
if (a->reg.file == FILE_ADDRESS) { |
if (a->getInsn() && isARL(a->getInsn())) { |
i->setSrc(s, a->getInsn()->getSrc(0)); |
} else { |
bld.setPosition(i, false); |
r = bld.getSSA(); |
bld.mkMov(r, a); |
i->setSrc(s, r); |
} |
} |
} |
if (i->op == OP_SHL && i->src(1).getFile() == FILE_IMMEDIATE) |
return; |
// turn result back into $a |
bld.setPosition(i, true); |
arl = bld.mkOp2(OP_SHL, TYPE_U32, i->getDef(0), bld.getSSA(), bld.mkImm(0)); |
i->setDef(0, arl->getSrc(0)); |
} |
void |
NV50LegalizeSSA::handleMUL(Instruction *mul) |
{ |
if (isFloatType(mul->sType) || typeSizeof(mul->sType) <= 2) |
return; |
Value *def = mul->getDef(0); |
Value *pred = mul->getPredicate(); |
CondCode cc = mul->cc; |
if (pred) |
mul->setPredicate(CC_ALWAYS, NULL); |
if (mul->op == OP_MAD) { |
Instruction *add = mul; |
bld.setPosition(add, false); |
Value *res = cloneShallow(func, mul->getDef(0)); |
mul = bld.mkOp2(OP_MUL, add->sType, res, add->getSrc(0), add->getSrc(1)); |
add->op = OP_ADD; |
add->setSrc(0, mul->getDef(0)); |
add->setSrc(1, add->getSrc(2)); |
for (int s = 2; add->srcExists(s); ++s) |
add->setSrc(s, NULL); |
mul->subOp = add->subOp; |
add->subOp = 0; |
} |
expandIntegerMUL(&bld, mul); |
if (pred) |
def->getInsn()->setPredicate(cc, pred); |
} |
// Use f32 division: first compute an approximate result, use it to reduce |
// the dividend, which should then be representable as f32, divide the reduced |
// dividend, and add the quotients. |
void |
NV50LegalizeSSA::handleDIV(Instruction *div) |
{ |
const DataType ty = div->sType; |
if (ty != TYPE_U32 && ty != TYPE_S32) |
return; |
Value *q, *q0, *qf, *aR, *aRf, *qRf, *qR, *t, *s, *m, *cond; |
bld.setPosition(div, false); |
Value *a, *af = bld.getSSA(); |
Value *b, *bf = bld.getSSA(); |
bld.mkCvt(OP_CVT, TYPE_F32, af, ty, div->getSrc(0)); |
bld.mkCvt(OP_CVT, TYPE_F32, bf, ty, div->getSrc(1)); |
if (isSignedType(ty)) { |
af->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS); |
bf->getInsn()->src(0).mod = Modifier(NV50_IR_MOD_ABS); |
a = bld.getSSA(); |
b = bld.getSSA(); |
bld.mkOp1(OP_ABS, ty, a, div->getSrc(0)); |
bld.mkOp1(OP_ABS, ty, b, div->getSrc(1)); |
} else { |
a = div->getSrc(0); |
b = div->getSrc(1); |
} |
bf = bld.mkOp1v(OP_RCP, TYPE_F32, bld.getSSA(), bf); |
bf = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), bf, bld.mkImm(-2)); |
bld.mkOp2(OP_MUL, TYPE_F32, (qf = bld.getSSA()), af, bf)->rnd = ROUND_Z; |
bld.mkCvt(OP_CVT, ty, (q0 = bld.getSSA()), TYPE_F32, qf)->rnd = ROUND_Z; |
// get error of 1st result |
expandIntegerMUL(&bld, |
bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q0, b)); |
bld.mkOp2(OP_SUB, TYPE_U32, (aRf = bld.getSSA()), a, t); |
bld.mkCvt(OP_CVT, TYPE_F32, (aR = bld.getSSA()), TYPE_U32, aRf); |
bld.mkOp2(OP_MUL, TYPE_F32, (qRf = bld.getSSA()), aR, bf)->rnd = ROUND_Z; |
bld.mkCvt(OP_CVT, TYPE_U32, (qR = bld.getSSA()), TYPE_F32, qRf) |
->rnd = ROUND_Z; |
bld.mkOp2(OP_ADD, ty, (q = bld.getSSA()), q0, qR); // add quotients |
// correction: if modulus >= divisor, add 1 |
expandIntegerMUL(&bld, |
bld.mkOp2(OP_MUL, TYPE_U32, (t = bld.getSSA()), q, b)); |
bld.mkOp2(OP_SUB, TYPE_U32, (m = bld.getSSA()), a, t); |
bld.mkCmp(OP_SET, CC_GE, TYPE_U32, (s = bld.getSSA()), TYPE_U32, m, b); |
if (!isSignedType(ty)) { |
div->op = OP_SUB; |
div->setSrc(0, q); |
div->setSrc(1, s); |
} else { |
t = q; |
bld.mkOp2(OP_SUB, TYPE_U32, (q = bld.getSSA()), t, s); |
s = bld.getSSA(); |
t = bld.getSSA(); |
// fix the sign |
bld.mkOp2(OP_XOR, TYPE_U32, NULL, div->getSrc(0), div->getSrc(1)) |
->setFlagsDef(0, (cond = bld.getSSA(1, FILE_FLAGS))); |
bld.mkOp1(OP_NEG, ty, s, q)->setPredicate(CC_S, cond); |
bld.mkOp1(OP_MOV, ty, t, q)->setPredicate(CC_NS, cond); |
div->op = OP_UNION; |
div->setSrc(0, s); |
div->setSrc(1, t); |
} |
} |
void |
NV50LegalizeSSA::handleMOD(Instruction *mod) |
{ |
if (mod->dType != TYPE_U32 && mod->dType != TYPE_S32) |
return; |
bld.setPosition(mod, false); |
Value *q = bld.getSSA(); |
Value *m = bld.getSSA(); |
bld.mkOp2(OP_DIV, mod->dType, q, mod->getSrc(0), mod->getSrc(1)); |
handleDIV(q->getInsn()); |
bld.setPosition(mod, false); |
expandIntegerMUL(&bld, bld.mkOp2(OP_MUL, TYPE_U32, m, q, mod->getSrc(1))); |
mod->op = OP_SUB; |
mod->setSrc(1, m); |
} |
bool |
NV50LegalizeSSA::visit(BasicBlock *bb) |
{ |
Instruction *insn, *next; |
// skipping PHIs (don't pass them to handleAddrDef) ! |
for (insn = bb->getEntry(); insn; insn = next) { |
next = insn->next; |
if (insn->defExists(0) && insn->getDef(0)->reg.file == FILE_ADDRESS) |
handleAddrDef(insn); |
switch (insn->op) { |
case OP_EXPORT: |
if (outWrites) |
propagateWriteToOutput(insn); |
break; |
case OP_DIV: |
handleDIV(insn); |
break; |
case OP_MOD: |
handleMOD(insn); |
break; |
case OP_MAD: |
case OP_MUL: |
handleMUL(insn); |
break; |
default: |
break; |
} |
} |
return true; |
} |
class NV50LoweringPreSSA : public Pass |
{ |
public: |
NV50LoweringPreSSA(Program *); |
private: |
virtual bool visit(Instruction *); |
virtual bool visit(Function *); |
bool handleRDSV(Instruction *); |
bool handleWRSV(Instruction *); |
bool handlePFETCH(Instruction *); |
bool handleEXPORT(Instruction *); |
bool handleLOAD(Instruction *); |
bool handleDIV(Instruction *); |
bool handleSQRT(Instruction *); |
bool handlePOW(Instruction *); |
bool handleSET(Instruction *); |
bool handleSLCT(CmpInstruction *); |
bool handleSELP(Instruction *); |
bool handleTEX(TexInstruction *); |
bool handleTXB(TexInstruction *); // I really |
bool handleTXL(TexInstruction *); // hate |
bool handleTXD(TexInstruction *); // these 3 |
bool handleTXLQ(TexInstruction *); |
bool handleCALL(Instruction *); |
bool handlePRECONT(Instruction *); |
bool handleCONT(Instruction *); |
void checkPredicate(Instruction *); |
void loadTexMsInfo(uint32_t off, Value **ms, Value **ms_x, Value **ms_y); |
void loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy); |
private: |
const Target *const targ; |
BuildUtil bld; |
Value *tid; |
}; |
NV50LoweringPreSSA::NV50LoweringPreSSA(Program *prog) : |
targ(prog->getTarget()), tid(NULL) |
{ |
bld.setProgram(prog); |
} |
bool |
NV50LoweringPreSSA::visit(Function *f) |
{ |
BasicBlock *root = BasicBlock::get(func->cfg.getRoot()); |
if (prog->getType() == Program::TYPE_COMPUTE) { |
// Add implicit "thread id" argument in $r0 to the function |
Value *arg = new_LValue(func, FILE_GPR); |
arg->reg.data.id = 0; |
f->ins.push_back(arg); |
bld.setPosition(root, false); |
tid = bld.mkMov(bld.getScratch(), arg, TYPE_U32)->getDef(0); |
} |
return true; |
} |
void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms, |
Value **ms_x, Value **ms_y) { |
// This loads the texture-indexed ms setting from the constant buffer |
Value *tmp = new_LValue(func, FILE_GPR); |
uint8_t b = prog->driver->io.resInfoCBSlot; |
off += prog->driver->io.suInfoBase; |
if (prog->getType() > Program::TYPE_VERTEX) |
off += 16 * 2 * 4; |
if (prog->getType() > Program::TYPE_GEOMETRY) |
off += 16 * 2 * 4; |
*ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol( |
FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL); |
*ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol( |
FILE_MEMORY_CONST, b, TYPE_U32, off + 4), NULL); |
*ms = bld.mkOp2v(OP_ADD, TYPE_U32, tmp, *ms_x, *ms_y); |
} |
void NV50LoweringPreSSA::loadMsInfo(Value *ms, Value *s, Value **dx, Value **dy) { |
// Given a MS level, and a sample id, compute the delta x/y |
uint8_t b = prog->driver->io.msInfoCBSlot; |
Value *off = new_LValue(func, FILE_ADDRESS), *t = new_LValue(func, FILE_GPR); |
// The required information is at mslevel * 16 * 4 + sample * 8 |
// = (mslevel * 8 + sample) * 8 |
bld.mkOp2(OP_SHL, |
TYPE_U32, |
off, |
bld.mkOp2v(OP_ADD, TYPE_U32, t, |
bld.mkOp2v(OP_SHL, TYPE_U32, t, ms, bld.mkImm(3)), |
s), |
bld.mkImm(3)); |
*dx = bld.mkLoadv(TYPE_U32, bld.mkSymbol( |
FILE_MEMORY_CONST, b, TYPE_U32, |
prog->driver->io.msInfoBase), off); |
*dy = bld.mkLoadv(TYPE_U32, bld.mkSymbol( |
FILE_MEMORY_CONST, b, TYPE_U32, |
prog->driver->io.msInfoBase + 4), off); |
} |
bool |
NV50LoweringPreSSA::handleTEX(TexInstruction *i) |
{ |
const int arg = i->tex.target.getArgCount(); |
const int dref = arg; |
const int lod = i->tex.target.isShadow() ? (arg + 1) : arg; |
// handle MS, which means looking up the MS params for this texture, and |
// adjusting the input coordinates to point at the right sample. |
if (i->tex.target.isMS()) { |
Value *x = i->getSrc(0); |
Value *y = i->getSrc(1); |
Value *s = i->getSrc(arg - 1); |
Value *tx = new_LValue(func, FILE_GPR), *ty = new_LValue(func, FILE_GPR), |
*ms, *ms_x, *ms_y, *dx, *dy; |
i->tex.target.clearMS(); |
loadTexMsInfo(i->tex.r * 4 * 2, &ms, &ms_x, &ms_y); |
loadMsInfo(ms, s, &dx, &dy); |
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); |
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); |
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); |
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); |
i->setSrc(0, tx); |
i->setSrc(1, ty); |
i->setSrc(arg - 1, bld.loadImm(NULL, 0)); |
} |
// dref comes before bias/lod |
if (i->tex.target.isShadow()) |
if (i->op == OP_TXB || i->op == OP_TXL) |
i->swapSources(dref, lod); |
if (i->tex.target.isArray()) { |
if (i->op != OP_TXF) { |
// array index must be converted to u32, but it's already an integer |
// for TXF |
Value *layer = i->getSrc(arg - 1); |
LValue *src = new_LValue(func, FILE_GPR); |
bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); |
bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); |
i->setSrc(arg - 1, src); |
} |
if (i->tex.target.isCube() && i->srcCount() > 4) { |
std::vector<Value *> acube, a2d; |
int c; |
acube.resize(4); |
for (c = 0; c < 4; ++c) |
acube[c] = i->getSrc(c); |
a2d.resize(4); |
for (c = 0; c < 3; ++c) |
a2d[c] = new_LValue(func, FILE_GPR); |
a2d[3] = NULL; |
bld.mkTex(OP_TEXPREP, TEX_TARGET_CUBE_ARRAY, i->tex.r, i->tex.s, |
a2d, acube)->asTex()->tex.mask = 0x7; |
for (c = 0; c < 3; ++c) |
i->setSrc(c, a2d[c]); |
for (; i->srcExists(c + 1); ++c) |
i->setSrc(c, i->getSrc(c + 1)); |
i->setSrc(c, NULL); |
assert(c <= 4); |
i->tex.target = i->tex.target.isShadow() ? |
TEX_TARGET_2D_ARRAY_SHADOW : TEX_TARGET_2D_ARRAY; |
} |
} |
// texel offsets are 3 immediate fields in the instruction, |
// nv50 cannot do textureGatherOffsets |
assert(i->tex.useOffsets <= 1); |
if (i->tex.useOffsets) { |
for (int c = 0; c < 3; ++c) { |
ImmediateValue val; |
if (!i->offset[0][c].getImmediate(val)) |
assert(!"non-immediate offset"); |
i->tex.offset[c] = val.reg.data.u32; |
i->offset[0][c].set(NULL); |
} |
} |
return true; |
} |
// Bias must be equal for all threads of a quad or lod calculation will fail. |
// |
// The lanes of a quad are grouped by the bit in the condition register they |
// have set, which is selected by differing bias values. |
// Move the input values for TEX into a new register set for each group and |
// execute TEX only for a specific group. |
// We always need to use 4 new registers for the inputs/outputs because the |
// implicitly calculated derivatives must be correct. |
// |
// TODO: move to SSA phase so we can easily determine whether bias is constant |
bool |
NV50LoweringPreSSA::handleTXB(TexInstruction *i) |
{ |
const CondCode cc[4] = { CC_EQU, CC_S, CC_C, CC_O }; |
int l, d; |
// We can't actually apply bias *and* do a compare for a cube |
// texture. Since the compare has to be done before the filtering, just |
// drop the bias on the floor. |
if (i->tex.target == TEX_TARGET_CUBE_SHADOW) { |
i->op = OP_TEX; |
i->setSrc(3, i->getSrc(4)); |
i->setSrc(4, NULL); |
return handleTEX(i); |
} |
handleTEX(i); |
Value *bias = i->getSrc(i->tex.target.getArgCount()); |
if (bias->isUniform()) |
return true; |
Instruction *cond = bld.mkOp1(OP_UNION, TYPE_U32, bld.getScratch(), |
bld.loadImm(NULL, 1)); |
bld.setPosition(cond, false); |
for (l = 1; l < 4; ++l) { |
const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR); |
Value *bit = bld.getSSA(); |
Value *pred = bld.getScratch(1, FILE_FLAGS); |
Value *imm = bld.loadImm(NULL, (1 << l)); |
bld.mkQuadop(qop, pred, l, bias, bias)->flagsDef = 0; |
bld.mkMov(bit, imm)->setPredicate(CC_EQ, pred); |
cond->setSrc(l, bit); |
} |
Value *flags = bld.getScratch(1, FILE_FLAGS); |
bld.setPosition(cond, true); |
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0)); |
Instruction *tex[4]; |
for (l = 0; l < 4; ++l) { |
(tex[l] = cloneForward(func, i))->setPredicate(cc[l], flags); |
bld.insert(tex[l]); |
} |
Value *res[4][4]; |
for (d = 0; i->defExists(d); ++d) |
res[0][d] = tex[0]->getDef(d); |
for (l = 1; l < 4; ++l) { |
for (d = 0; tex[l]->defExists(d); ++d) { |
res[l][d] = cloneShallow(func, res[0][d]); |
bld.mkMov(res[l][d], tex[l]->getDef(d))->setPredicate(cc[l], flags); |
} |
} |
for (d = 0; i->defExists(d); ++d) { |
Instruction *dst = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(d)); |
for (l = 0; l < 4; ++l) |
dst->setSrc(l, res[l][d]); |
} |
delete_Instruction(prog, i); |
return true; |
} |
// LOD must be equal for all threads of a quad. |
// Unlike with TXB, here we can just diverge since there's no LOD calculation |
// that would require all 4 threads' sources to be set up properly. |
bool |
NV50LoweringPreSSA::handleTXL(TexInstruction *i) |
{ |
handleTEX(i); |
Value *lod = i->getSrc(i->tex.target.getArgCount()); |
if (lod->isUniform()) |
return true; |
BasicBlock *currBB = i->bb; |
BasicBlock *texiBB = i->bb->splitBefore(i, false); |
BasicBlock *joinBB = i->bb->splitAfter(i); |
bld.setPosition(currBB, true); |
currBB->joinAt = bld.mkFlow(OP_JOINAT, joinBB, CC_ALWAYS, NULL); |
for (int l = 0; l <= 3; ++l) { |
const uint8_t qop = QUADOP(SUBR, SUBR, SUBR, SUBR); |
Value *pred = bld.getScratch(1, FILE_FLAGS); |
bld.setPosition(currBB, true); |
bld.mkQuadop(qop, pred, l, lod, lod)->flagsDef = 0; |
bld.mkFlow(OP_BRA, texiBB, CC_EQ, pred)->fixed = 1; |
currBB->cfg.attach(&texiBB->cfg, Graph::Edge::FORWARD); |
if (l <= 2) { |
BasicBlock *laneBB = new BasicBlock(func); |
currBB->cfg.attach(&laneBB->cfg, Graph::Edge::TREE); |
currBB = laneBB; |
} |
} |
bld.setPosition(joinBB, false); |
bld.mkOp(OP_JOIN, TYPE_NONE, NULL); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleTXD(TexInstruction *i) |
{ |
static const uint8_t qOps[4][2] = |
{ |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 |
}; |
Value *def[4][4]; |
Value *crd[3]; |
Instruction *tex; |
Value *zero = bld.loadImm(bld.getSSA(), 0); |
int l, c; |
const int dim = i->tex.target.getDim(); |
handleTEX(i); |
i->op = OP_TEX; // no need to clone dPdx/dPdy later |
for (c = 0; c < dim; ++c) |
crd[c] = bld.getScratch(); |
bld.mkOp(OP_QUADON, TYPE_NONE, NULL); |
for (l = 0; l < 4; ++l) { |
// mov coordinates from lane l to all lanes |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); |
// add dPdx from lane l to lanes dx |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); |
// add dPdy from lane l to lanes dy |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); |
// texture |
bld.insert(tex = cloneForward(func, i)); |
for (c = 0; c < dim; ++c) |
tex->setSrc(c, crd[c]); |
// save results |
for (c = 0; i->defExists(c); ++c) { |
Instruction *mov; |
def[c][l] = bld.getSSA(); |
mov = bld.mkMov(def[c][l], tex->getDef(c)); |
mov->fixed = 1; |
mov->lanes = 1 << l; |
} |
} |
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); |
for (c = 0; i->defExists(c); ++c) { |
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); |
for (l = 0; l < 4; ++l) |
u->setSrc(l, def[c][l]); |
} |
i->bb->remove(i); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleTXLQ(TexInstruction *i) |
{ |
handleTEX(i); |
bld.setPosition(i, true); |
/* The returned values are not quite what we want: |
* (a) convert from s32 to f32 |
* (b) multiply by 1/256 |
*/ |
for (int def = 0; def < 2; ++def) { |
if (!i->defExists(def)) |
continue; |
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), TYPE_S32, i->getDef(def)); |
bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), |
i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); |
} |
return true; |
} |
bool |
NV50LoweringPreSSA::handleSET(Instruction *i) |
{ |
if (i->dType == TYPE_F32) { |
bld.setPosition(i, true); |
i->dType = TYPE_U32; |
bld.mkOp1(OP_ABS, TYPE_S32, i->getDef(0), i->getDef(0)); |
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_S32, i->getDef(0)); |
} |
return true; |
} |
bool |
NV50LoweringPreSSA::handleSLCT(CmpInstruction *i) |
{ |
Value *src0 = bld.getSSA(); |
Value *src1 = bld.getSSA(); |
Value *pred = bld.getScratch(1, FILE_FLAGS); |
Value *v0 = i->getSrc(0); |
Value *v1 = i->getSrc(1); |
// XXX: these probably shouldn't be immediates in the first place ... |
if (v0->asImm()) |
v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0); |
if (v1->asImm()) |
v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0); |
bld.setPosition(i, true); |
bld.mkMov(src0, v0)->setPredicate(CC_NE, pred); |
bld.mkMov(src1, v1)->setPredicate(CC_EQ, pred); |
bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1); |
bld.setPosition(i, false); |
i->op = OP_SET; |
i->setFlagsDef(0, pred); |
i->dType = TYPE_U8; |
i->setSrc(0, i->getSrc(2)); |
i->setSrc(2, NULL); |
i->setSrc(1, bld.loadImm(NULL, 0)); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleSELP(Instruction *i) |
{ |
Value *src0 = bld.getSSA(); |
Value *src1 = bld.getSSA(); |
Value *v0 = i->getSrc(0); |
Value *v1 = i->getSrc(1); |
if (v0->asImm()) |
v0 = bld.mkMov(bld.getSSA(), v0)->getDef(0); |
if (v1->asImm()) |
v1 = bld.mkMov(bld.getSSA(), v1)->getDef(0); |
bld.mkMov(src0, v0)->setPredicate(CC_NE, i->getSrc(2)); |
bld.mkMov(src1, v1)->setPredicate(CC_EQ, i->getSrc(2)); |
bld.mkOp2(OP_UNION, i->dType, i->getDef(0), src0, src1); |
delete_Instruction(prog, i); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleWRSV(Instruction *i) |
{ |
Symbol *sym = i->getSrc(0)->asSym(); |
// these are all shader outputs, $sreg are not writeable |
uint32_t addr = targ->getSVAddress(FILE_SHADER_OUTPUT, sym); |
if (addr >= 0x400) |
return false; |
sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); |
bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), i->getSrc(1)); |
bld.getBB()->remove(i); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleCALL(Instruction *i) |
{ |
if (prog->getType() == Program::TYPE_COMPUTE) { |
// Add implicit "thread id" argument in $r0 to the function |
i->setSrc(i->srcCount(), tid); |
} |
return true; |
} |
bool |
NV50LoweringPreSSA::handlePRECONT(Instruction *i) |
{ |
delete_Instruction(prog, i); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleCONT(Instruction *i) |
{ |
i->op = OP_BRA; |
return true; |
} |
bool |
NV50LoweringPreSSA::handleRDSV(Instruction *i) |
{ |
Symbol *sym = i->getSrc(0)->asSym(); |
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); |
Value *def = i->getDef(0); |
SVSemantic sv = sym->reg.data.sv.sv; |
int idx = sym->reg.data.sv.index; |
if (addr >= 0x400) // mov $sreg |
return true; |
switch (sv) { |
case SV_POSITION: |
assert(prog->getType() == Program::TYPE_FRAGMENT); |
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); |
break; |
case SV_FACE: |
bld.mkInterp(NV50_IR_INTERP_FLAT, def, addr, NULL); |
if (i->dType == TYPE_F32) { |
bld.mkOp2(OP_OR, TYPE_U32, def, def, bld.mkImm(0x00000001)); |
bld.mkOp1(OP_NEG, TYPE_S32, def, def); |
bld.mkCvt(OP_CVT, TYPE_F32, def, TYPE_S32, def); |
} |
break; |
case SV_NCTAID: |
case SV_CTAID: |
case SV_NTID: |
if ((sv == SV_NCTAID && idx >= 2) || |
(sv == SV_NTID && idx >= 3)) { |
bld.mkMov(def, bld.mkImm(1)); |
} else if (sv == SV_CTAID && idx >= 2) { |
bld.mkMov(def, bld.mkImm(0)); |
} else { |
Value *x = bld.getSSA(2); |
bld.mkOp1(OP_LOAD, TYPE_U16, x, |
bld.mkSymbol(FILE_MEMORY_SHARED, 0, TYPE_U16, addr)); |
bld.mkCvt(OP_CVT, TYPE_U32, def, TYPE_U16, x); |
} |
break; |
case SV_TID: |
if (idx == 0) { |
bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x0000ffff)); |
} else if (idx == 1) { |
bld.mkOp2(OP_AND, TYPE_U32, def, tid, bld.mkImm(0x03ff0000)); |
bld.mkOp2(OP_SHR, TYPE_U32, def, def, bld.mkImm(16)); |
} else if (idx == 2) { |
bld.mkOp2(OP_SHR, TYPE_U32, def, tid, bld.mkImm(26)); |
} else { |
bld.mkMov(def, bld.mkImm(0)); |
} |
break; |
case SV_SAMPLE_POS: { |
Value *off = new_LValue(func, FILE_ADDRESS); |
bld.mkOp1(OP_RDSV, TYPE_U32, def, bld.mkSysVal(SV_SAMPLE_INDEX, 0)); |
bld.mkOp2(OP_SHL, TYPE_U32, off, def, bld.mkImm(3)); |
bld.mkLoad(TYPE_F32, |
def, |
bld.mkSymbol( |
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, |
TYPE_U32, prog->driver->io.sampleInfoBase + 4 * idx), |
off); |
break; |
} |
default: |
bld.mkFetch(i->getDef(0), i->dType, |
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), NULL); |
break; |
} |
bld.getBB()->remove(i); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleDIV(Instruction *i) |
{ |
if (!isFloatType(i->dType)) |
return true; |
bld.setPosition(i, false); |
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); |
i->op = OP_MUL; |
i->setSrc(1, rcp->getDef(0)); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleSQRT(Instruction *i) |
{ |
Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, |
bld.getSSA(), i->getSrc(0)); |
i->op = OP_MUL; |
i->setSrc(1, rsq->getDef(0)); |
return true; |
} |
bool |
NV50LoweringPreSSA::handlePOW(Instruction *i) |
{ |
LValue *val = bld.getScratch(); |
bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); |
bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; |
bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); |
i->op = OP_EX2; |
i->setSrc(0, val); |
i->setSrc(1, NULL); |
return true; |
} |
bool |
NV50LoweringPreSSA::handleEXPORT(Instruction *i) |
{ |
if (prog->getType() == Program::TYPE_FRAGMENT) { |
if (i->getIndirect(0, 0)) { |
// TODO: redirect to l[] here, load to GPRs at exit |
return false; |
} else { |
int id = i->getSrc(0)->reg.data.offset / 4; // in 32 bit reg units |
i->op = OP_MOV; |
i->subOp = NV50_IR_SUBOP_MOV_FINAL; |
i->src(0).set(i->src(1)); |
i->setSrc(1, NULL); |
i->setDef(0, new_LValue(func, FILE_GPR)); |
i->getDef(0)->reg.data.id = id; |
prog->maxGPR = MAX2(prog->maxGPR, id); |
} |
} |
return true; |
} |
// Handle indirect addressing in geometry shaders: |
// |
// ld $r0 a[$a1][$a2+k] -> |
// ld $r0 a[($a1 + $a2 * $vstride) + k], where k *= $vstride is implicit |
// |
bool |
NV50LoweringPreSSA::handleLOAD(Instruction *i) |
{ |
ValueRef src = i->src(0); |
if (src.isIndirect(1)) { |
assert(prog->getType() == Program::TYPE_GEOMETRY); |
Value *addr = i->getIndirect(0, 1); |
if (src.isIndirect(0)) { |
// base address is in an address register, so move to a GPR |
Value *base = bld.getScratch(); |
bld.mkMov(base, addr); |
Symbol *sv = bld.mkSysVal(SV_VERTEX_STRIDE, 0); |
Value *vstride = bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getSSA(), sv); |
Value *attrib = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 0), bld.mkImm(2)); |
// Calculate final address: addr = base + attr*vstride; use 16-bit |
// multiplication since 32-bit would be lowered to multiple |
// instructions, and we only need the low 16 bits of the result |
Value *a[2], *b[2]; |
bld.mkSplit(a, 2, attrib); |
bld.mkSplit(b, 2, vstride); |
Value *sum = bld.mkOp3v(OP_MAD, TYPE_U16, bld.getSSA(), a[0], b[0], |
base); |
// move address from GPR into an address register |
addr = bld.getSSA(2, FILE_ADDRESS); |
bld.mkMov(addr, sum); |
} |
i->setIndirect(0, 1, NULL); |
i->setIndirect(0, 0, addr); |
} |
return true; |
} |
bool |
NV50LoweringPreSSA::handlePFETCH(Instruction *i) |
{ |
assert(prog->getType() == Program::TYPE_GEOMETRY); |
// NOTE: cannot use getImmediate here, not in SSA form yet, move to |
// later phase if that assertion ever triggers: |
ImmediateValue *imm = i->getSrc(0)->asImm(); |
assert(imm); |
assert(imm->reg.data.u32 <= 127); // TODO: use address reg if that happens |
if (i->srcExists(1)) { |
// indirect addressing of vertex in primitive space |
LValue *val = bld.getScratch(); |
Value *ptr = bld.getSSA(2, FILE_ADDRESS); |
bld.mkOp2v(OP_SHL, TYPE_U32, ptr, i->getSrc(1), bld.mkImm(2)); |
bld.mkOp2v(OP_PFETCH, TYPE_U32, val, imm, ptr); |
// NOTE: PFETCH directly to an $aX only works with direct addressing |
i->op = OP_SHL; |
i->setSrc(0, val); |
i->setSrc(1, bld.mkImm(0)); |
} |
return true; |
} |
// Set flags according to predicate and make the instruction read $cX. |
void |
NV50LoweringPreSSA::checkPredicate(Instruction *insn) |
{ |
Value *pred = insn->getPredicate(); |
Value *cdst; |
// FILE_PREDICATE will simply be changed to FLAGS on conversion to SSA |
if (!pred || |
pred->reg.file == FILE_FLAGS || pred->reg.file == FILE_PREDICATE) |
return; |
cdst = bld.getSSA(1, FILE_FLAGS); |
bld.mkCmp(OP_SET, CC_NEU, insn->dType, cdst, insn->dType, bld.loadImm(NULL, 0), pred); |
insn->setPredicate(insn->cc, cdst); |
} |
// |
// - add quadop dance for texturing |
// - put FP outputs in GPRs |
// - convert instruction sequences |
// |
bool |
NV50LoweringPreSSA::visit(Instruction *i) |
{ |
bld.setPosition(i, false); |
if (i->cc != CC_ALWAYS) |
checkPredicate(i); |
switch (i->op) { |
case OP_TEX: |
case OP_TXF: |
case OP_TXG: |
return handleTEX(i->asTex()); |
case OP_TXB: |
return handleTXB(i->asTex()); |
case OP_TXL: |
return handleTXL(i->asTex()); |
case OP_TXD: |
return handleTXD(i->asTex()); |
case OP_TXLQ: |
return handleTXLQ(i->asTex()); |
case OP_EX2: |
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); |
i->setSrc(0, i->getDef(0)); |
break; |
case OP_SET: |
return handleSET(i); |
case OP_SLCT: |
return handleSLCT(i->asCmp()); |
case OP_SELP: |
return handleSELP(i); |
case OP_POW: |
return handlePOW(i); |
case OP_DIV: |
return handleDIV(i); |
case OP_SQRT: |
return handleSQRT(i); |
case OP_EXPORT: |
return handleEXPORT(i); |
case OP_LOAD: |
return handleLOAD(i); |
case OP_RDSV: |
return handleRDSV(i); |
case OP_WRSV: |
return handleWRSV(i); |
case OP_CALL: |
return handleCALL(i); |
case OP_PRECONT: |
return handlePRECONT(i); |
case OP_CONT: |
return handleCONT(i); |
case OP_PFETCH: |
return handlePFETCH(i); |
default: |
break; |
} |
return true; |
} |
bool |
TargetNV50::runLegalizePass(Program *prog, CGStage stage) const |
{ |
bool ret = false; |
if (stage == CG_STAGE_PRE_SSA) { |
NV50LoweringPreSSA pass(prog); |
ret = pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_SSA) { |
if (!prog->targetPriv) |
prog->targetPriv = new std::list<Instruction *>(); |
NV50LegalizeSSA pass(prog); |
ret = pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_POST_RA) { |
NV50LegalizePostRA pass; |
ret = pass.run(prog, false, true); |
if (prog->targetPriv) |
delete reinterpret_cast<std::list<Instruction *> *>(prog->targetPriv); |
} |
return ret; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |
---|
0,0 → 1,1814 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_build_util.h" |
#include "codegen/nv50_ir_target_nvc0.h" |
#include "codegen/nv50_ir_lowering_nvc0.h" |
#include <limits> |
namespace nv50_ir { |
#define QOP_ADD 0 |
#define QOP_SUBR 1 |
#define QOP_SUB 2 |
#define QOP_MOV2 3 |
// UL UR LL LR |
#define QUADOP(q, r, s, t) \ |
((QOP_##q << 6) | (QOP_##r << 4) | \ |
(QOP_##s << 2) | (QOP_##t << 0)) |
void |
NVC0LegalizeSSA::handleDIV(Instruction *i) |
{ |
FlowInstruction *call; |
int builtin; |
Value *def[2]; |
bld.setPosition(i, false); |
def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); |
def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); |
switch (i->dType) { |
case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; |
case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; |
default: |
return; |
} |
call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); |
bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); |
bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); |
bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); |
call->fixed = 1; |
call->absolute = call->builtin = 1; |
call->target.builtin = builtin; |
delete_Instruction(prog, i); |
} |
void |
NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) |
{ |
assert(i->dType == TYPE_F64); |
// There are instructions that will compute the high 32 bits of the 64-bit |
// float. We will just stick 0 in the bottom 32 bits. |
bld.setPosition(i, false); |
// 1. Take the source and it up. |
Value *src[2], *dst[2], *def = i->getDef(0); |
bld.mkSplit(src, 4, i->getSrc(0)); |
// 2. We don't care about the low 32 bits of the destination. Stick a 0 in. |
dst[0] = bld.loadImm(NULL, 0); |
dst[1] = bld.getSSA(); |
// 3. The new version of the instruction takes the high 32 bits of the |
// source and outputs the high 32 bits of the destination. |
i->setSrc(0, src[1]); |
i->setDef(0, dst[1]); |
i->setType(TYPE_F32); |
i->subOp = NV50_IR_SUBOP_RCPRSQ_64H; |
// 4. Recombine the two dst pieces back into the original destination. |
bld.setPosition(i, true); |
bld.mkOp2(OP_MERGE, TYPE_U64, def, dst[0], dst[1]); |
} |
void |
NVC0LegalizeSSA::handleFTZ(Instruction *i) |
{ |
// Only want to flush float inputs |
assert(i->sType == TYPE_F32); |
// If we're already flushing denorms (and NaN's) to zero, no need for this. |
if (i->dnz) |
return; |
// Only certain classes of operations can flush |
OpClass cls = prog->getTarget()->getOpClass(i->op); |
if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE && |
cls != OPCLASS_CONVERT) |
return; |
i->ftz = true; |
} |
bool |
NVC0LegalizeSSA::visit(Function *fn) |
{ |
bld.setProgram(fn->getProgram()); |
return true; |
} |
bool |
NVC0LegalizeSSA::visit(BasicBlock *bb) |
{ |
Instruction *next; |
for (Instruction *i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (i->sType == TYPE_F32) { |
if (prog->getType() != Program::TYPE_COMPUTE) |
handleFTZ(i); |
continue; |
} |
switch (i->op) { |
case OP_DIV: |
case OP_MOD: |
handleDIV(i); |
break; |
case OP_RCP: |
case OP_RSQ: |
if (i->dType == TYPE_F64) |
handleRCPRSQ(i); |
break; |
default: |
break; |
} |
} |
return true; |
} |
NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) |
: rZero(NULL), |
carry(NULL), |
needTexBar(prog->getTarget()->getChipset() >= 0xe0) |
{ |
} |
bool |
NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, |
const Instruction *early) const |
{ |
if (early->bb == later->bb) |
return early->serial < later->serial; |
return later->bb->dominatedBy(early->bb); |
} |
void |
NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, |
Instruction *usei, const Instruction *insn) |
{ |
bool add = true; |
for (std::list<TexUse>::iterator it = uses.begin(); |
it != uses.end();) { |
if (insnDominatedBy(usei, it->insn)) { |
add = false; |
break; |
} |
if (insnDominatedBy(it->insn, usei)) |
it = uses.erase(it); |
else |
++it; |
} |
if (add) |
uses.push_back(TexUse(usei, insn)); |
} |
void |
NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, |
Instruction *insn, |
const BasicBlock *term, |
std::list<TexUse> &uses) |
{ |
while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) |
insn = insn->getSrc(0)->getUniqueInsn(); |
if (!insn->bb->reachableBy(texi->bb, term)) |
return; |
switch (insn->op) { |
/* Values not connected to the tex's definition through any of these should |
* not be conflicting. |
*/ |
case OP_SPLIT: |
case OP_MERGE: |
case OP_PHI: |
case OP_UNION: |
/* recurse again */ |
for (int s = 0; insn->srcExists(s); ++s) |
findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, |
uses); |
break; |
default: |
// if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? |
addTexUse(uses, insn, texi); |
break; |
} |
} |
void |
NVC0LegalizePostRA::findFirstUses( |
const Instruction *texi, |
const Instruction *insn, |
std::list<TexUse> &uses, |
std::tr1::unordered_set<const Instruction *>& visited) |
{ |
for (int d = 0; insn->defExists(d); ++d) { |
Value *v = insn->getDef(d); |
for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { |
Instruction *usei = (*u)->getInsn(); |
// NOTE: In case of a loop that overwrites a value but never uses |
// it, it can happen that we have a cycle of uses that consists only |
// of phis and no-op moves and will thus cause an infinite loop here |
// since these are not considered actual uses. |
// The most obvious (and perhaps the only) way to prevent this is to |
// remember which instructions we've already visited. |
if (visited.find(usei) != visited.end()) |
continue; |
visited.insert(usei); |
if (usei->op == OP_PHI || usei->op == OP_UNION) { |
// need a barrier before WAW cases |
for (int s = 0; usei->srcExists(s); ++s) { |
Instruction *defi = usei->getSrc(s)->getUniqueInsn(); |
if (defi && &usei->src(s) != *u) |
findOverwritingDefs(texi, defi, usei->bb, uses); |
} |
} |
if (usei->op == OP_SPLIT || |
usei->op == OP_MERGE || |
usei->op == OP_PHI || |
usei->op == OP_UNION) { |
// these uses don't manifest in the machine code |
findFirstUses(texi, usei, uses, visited); |
} else |
if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && |
usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { |
findFirstUses(texi, usei, uses, visited); |
} else { |
addTexUse(uses, usei, insn); |
} |
} |
} |
} |
// Texture barriers: |
// This pass is a bit long and ugly and can probably be optimized. |
// |
// 1. obtain a list of TEXes and their outputs' first use(s) |
// 2. calculate the barrier level of each first use (minimal number of TEXes, |
// over all paths, between the TEX and the use in question) |
// 3. for each barrier, if all paths from the source TEX to that barrier |
// contain a barrier of lesser level, it can be culled |
bool |
NVC0LegalizePostRA::insertTextureBarriers(Function *fn) |
{ |
std::list<TexUse> *uses; |
std::vector<Instruction *> texes; |
std::vector<int> bbFirstTex; |
std::vector<int> bbFirstUse; |
std::vector<int> texCounts; |
std::vector<TexUse> useVec; |
ArrayList insns; |
fn->orderInstructions(insns); |
texCounts.resize(fn->allBBlocks.getSize(), 0); |
bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); |
bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); |
// tag BB CFG nodes by their id for later |
for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { |
BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); |
if (bb) |
bb->cfg.tag = bb->getId(); |
} |
// gather the first uses for each TEX |
for (int i = 0; i < insns.getSize(); ++i) { |
Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); |
if (isTextureOp(tex->op)) { |
texes.push_back(tex); |
if (!texCounts.at(tex->bb->getId())) |
bbFirstTex[tex->bb->getId()] = texes.size() - 1; |
texCounts[tex->bb->getId()]++; |
} |
} |
insns.clear(); |
if (texes.empty()) |
return false; |
uses = new std::list<TexUse>[texes.size()]; |
if (!uses) |
return false; |
for (size_t i = 0; i < texes.size(); ++i) { |
std::tr1::unordered_set<const Instruction *> visited; |
findFirstUses(texes[i], texes[i], uses[i], visited); |
} |
// determine the barrier level at each use |
for (size_t i = 0; i < texes.size(); ++i) { |
for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); |
++u) { |
BasicBlock *tb = texes[i]->bb; |
BasicBlock *ub = u->insn->bb; |
if (tb == ub) { |
u->level = 0; |
for (size_t j = i + 1; j < texes.size() && |
texes[j]->bb == tb && texes[j]->serial < u->insn->serial; |
++j) |
u->level++; |
} else { |
u->level = fn->cfg.findLightestPathWeight(&tb->cfg, |
&ub->cfg, texCounts); |
if (u->level < 0) { |
WARN("Failed to find path TEX -> TEXBAR\n"); |
u->level = 0; |
continue; |
} |
// this counted all TEXes in the origin block, correct that |
u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; |
// and did not count the TEXes in the destination block, add those |
for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && |
texes[j]->bb == ub && texes[j]->serial < u->insn->serial; |
++j) |
u->level++; |
} |
assert(u->level >= 0); |
useVec.push_back(*u); |
} |
} |
delete[] uses; |
// insert the barriers |
for (size_t i = 0; i < useVec.size(); ++i) { |
Instruction *prev = useVec[i].insn->prev; |
if (useVec[i].level < 0) |
continue; |
if (prev && prev->op == OP_TEXBAR) { |
if (prev->subOp > useVec[i].level) |
prev->subOp = useVec[i].level; |
prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); |
} else { |
Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); |
bar->fixed = 1; |
bar->subOp = useVec[i].level; |
// make use explicit to ease latency calculation |
bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); |
useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); |
} |
} |
if (fn->getProgram()->optLevel < 3) |
return true; |
std::vector<Limits> limitT, limitB, limitS; // entry, exit, single |
limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); |
limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); |
limitS.resize(fn->allBBlocks.getSize()); |
// cull unneeded barriers (should do that earlier, but for simplicity) |
IteratorRef bi = fn->cfg.iteratorCFG(); |
// first calculate min/max outstanding TEXes for each BB |
for (bi->reset(); !bi->end(); bi->next()) { |
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); |
BasicBlock *bb = BasicBlock::get(n); |
int min = 0; |
int max = std::numeric_limits<int>::max(); |
for (Instruction *i = bb->getFirst(); i; i = i->next) { |
if (isTextureOp(i->op)) { |
min++; |
if (max < std::numeric_limits<int>::max()) |
max++; |
} else |
if (i->op == OP_TEXBAR) { |
min = MIN2(min, i->subOp); |
max = MIN2(max, i->subOp); |
} |
} |
// limits when looking at an isolated block |
limitS[bb->getId()].min = min; |
limitS[bb->getId()].max = max; |
} |
// propagate the min/max values |
for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { |
for (bi->reset(); !bi->end(); bi->next()) { |
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); |
BasicBlock *bb = BasicBlock::get(n); |
const int bbId = bb->getId(); |
for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { |
BasicBlock *in = BasicBlock::get(ei.getNode()); |
const int inId = in->getId(); |
limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); |
limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); |
} |
// I just hope this is correct ... |
if (limitS[bbId].max == std::numeric_limits<int>::max()) { |
// no barrier |
limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; |
limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; |
} else { |
// block contained a barrier |
limitB[bbId].min = MIN2(limitS[bbId].max, |
limitT[bbId].min + limitS[bbId].min); |
limitB[bbId].max = MIN2(limitS[bbId].max, |
limitT[bbId].max + limitS[bbId].min); |
} |
} |
} |
// finally delete unnecessary barriers |
for (bi->reset(); !bi->end(); bi->next()) { |
Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); |
BasicBlock *bb = BasicBlock::get(n); |
Instruction *prev = NULL; |
Instruction *next; |
int max = limitT[bb->getId()].max; |
for (Instruction *i = bb->getFirst(); i; i = next) { |
next = i->next; |
if (i->op == OP_TEXBAR) { |
if (i->subOp >= max) { |
delete_Instruction(prog, i); |
i = NULL; |
} else { |
max = i->subOp; |
if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { |
delete_Instruction(prog, prev); |
prev = NULL; |
} |
} |
} else |
if (isTextureOp(i->op)) { |
max++; |
} |
if (i && !i->isNop()) |
prev = i; |
} |
} |
return true; |
} |
bool |
NVC0LegalizePostRA::visit(Function *fn) |
{ |
if (needTexBar) |
insertTextureBarriers(fn); |
rZero = new_LValue(fn, FILE_GPR); |
carry = new_LValue(fn, FILE_FLAGS); |
rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR); |
carry->reg.data.id = 0; |
return true; |
} |
void |
NVC0LegalizePostRA::replaceZero(Instruction *i) |
{ |
for (int s = 0; i->srcExists(s); ++s) { |
if (s == 2 && i->op == OP_SUCLAMP) |
continue; |
ImmediateValue *imm = i->getSrc(s)->asImm(); |
if (imm && imm->reg.data.u64 == 0) |
i->setSrc(s, rZero); |
} |
} |
// replace CONT with BRA for single unconditional continue |
bool |
NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) |
{ |
if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) |
return false; |
Graph::EdgeIterator ei = bb->cfg.incident(); |
if (ei.getType() != Graph::Edge::BACK) |
ei.next(); |
if (ei.getType() != Graph::Edge::BACK) |
return false; |
BasicBlock *contBB = BasicBlock::get(ei.getNode()); |
if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || |
contBB->getExit()->getPredicate()) |
return false; |
contBB->getExit()->op = OP_BRA; |
bb->remove(bb->getEntry()); // delete PRECONT |
ei.next(); |
assert(ei.end() || ei.getType() != Graph::Edge::BACK); |
return true; |
} |
// replace branches to join blocks with join ops |
void |
NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) |
{ |
if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) |
return; |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { |
BasicBlock *in = BasicBlock::get(ei.getNode()); |
Instruction *exit = in->getExit(); |
if (!exit) { |
in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); |
// there should always be a terminator instruction |
WARN("inserted missing terminator in BB:%i\n", in->getId()); |
} else |
if (exit->op == OP_BRA) { |
exit->op = OP_JOIN; |
exit->asFlow()->limit = 1; // must-not-propagate marker |
} |
} |
bb->remove(bb->getEntry()); |
} |
bool |
NVC0LegalizePostRA::visit(BasicBlock *bb) |
{ |
Instruction *i, *next; |
// remove pseudo operations and non-fixed no-ops, split 64 bit operations |
for (i = bb->getFirst(); i; i = next) { |
next = i->next; |
if (i->op == OP_EMIT || i->op == OP_RESTART) { |
if (!i->getDef(0)->refCount()) |
i->setDef(0, NULL); |
if (i->src(0).getFile() == FILE_IMMEDIATE) |
i->setSrc(0, rZero); // initial value must be 0 |
replaceZero(i); |
} else |
if (i->isNop()) { |
bb->remove(i); |
} else { |
// TODO: Move this to before register allocation for operations that |
// need the $c register ! |
if (typeSizeof(i->dType) == 8) { |
Instruction *hi; |
hi = BuildUtil::split64BitOpPostRA(func, i, rZero, carry); |
if (hi) |
next = hi; |
} |
if (i->op != OP_MOV && i->op != OP_PFETCH) |
replaceZero(i); |
} |
} |
if (!bb->getEntry()) |
return true; |
if (!tryReplaceContWithBra(bb)) |
propagateJoin(bb); |
return true; |
} |
NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) |
{ |
bld.setProgram(prog); |
gMemBase = NULL; |
} |
bool |
NVC0LoweringPass::visit(Function *fn) |
{ |
if (prog->getType() == Program::TYPE_GEOMETRY) { |
assert(!strncmp(fn->getName(), "MAIN", 4)); |
// TODO: when we generate actual functions pass this value along somehow |
bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); |
gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); |
if (fn->cfgExit) { |
bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); |
bld.mkMovToReg(0, gpEmitAddress); |
} |
} |
return true; |
} |
bool |
NVC0LoweringPass::visit(BasicBlock *bb) |
{ |
return true; |
} |
inline Value * |
NVC0LoweringPass::loadTexHandle(Value *ptr, unsigned int slot) |
{ |
uint8_t b = prog->driver->io.resInfoCBSlot; |
uint32_t off = prog->driver->io.texBindBase + slot * 4; |
return bld. |
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); |
} |
// move array source to first slot, convert to u16, add indirections |
bool |
NVC0LoweringPass::handleTEX(TexInstruction *i) |
{ |
const int dim = i->tex.target.getDim() + i->tex.target.isCube(); |
const int arg = i->tex.target.getArgCount(); |
const int lyr = arg - (i->tex.target.isMS() ? 2 : 1); |
const int chipset = prog->getTarget()->getChipset(); |
// Arguments to the TEX instruction are a little insane. Even though the |
// encoding is identical between SM20 and SM30, the arguments mean |
// different things between Fermi and Kepler+. A lot of arguments are |
// optional based on flags passed to the instruction. This summarizes the |
// order of things. |
// |
// Fermi: |
// array/indirect |
// coords |
// sample |
// lod bias |
// depth compare |
// offsets: |
// - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg) |
// - other: 4 bits each, single reg |
// |
// Kepler+: |
// indirect handle |
// array (+ offsets for txd in upper 16 bits) |
// coords |
// sample |
// lod bias |
// depth compare |
// offsets (same as fermi, except txd which takes it with array) |
// |
// Maxwell (tex): |
// array |
// coords |
// indirect handle |
// sample |
// lod bias |
// depth compare |
// offsets |
// |
// Maxwell (txd): |
// indirect handle |
// coords |
// array + offsets |
// derivatives |
if (chipset >= NVISA_GK104_CHIPSET) { |
if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { |
// XXX this ignores tsc, and assumes a 1:1 mapping |
assert(i->tex.rIndirectSrc >= 0); |
Value *hnd = loadTexHandle( |
bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirectR(), bld.mkImm(2)), |
i->tex.r); |
i->tex.r = 0xff; |
i->tex.s = 0x1f; |
i->setIndirectR(hnd); |
i->setIndirectS(NULL); |
} else if (i->tex.r == i->tex.s) { |
i->tex.r += prog->driver->io.texBindBase / 4; |
i->tex.s = 0; // only a single cX[] value possible here |
} else { |
Value *hnd = bld.getScratch(); |
Value *rHnd = loadTexHandle(NULL, i->tex.r); |
Value *sHnd = loadTexHandle(NULL, i->tex.s); |
bld.mkOp3(OP_INSBF, TYPE_U32, hnd, rHnd, bld.mkImm(0x1400), sHnd); |
i->tex.r = 0; // not used for indirect tex |
i->tex.s = 0; |
i->setIndirectR(hnd); |
} |
if (i->tex.target.isArray()) { |
LValue *layer = new_LValue(func, FILE_GPR); |
Value *src = i->getSrc(lyr); |
const int sat = (i->op == OP_TXF) ? 1 : 0; |
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; |
bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; |
if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) { |
for (int s = dim; s >= 1; --s) |
i->setSrc(s, i->getSrc(s - 1)); |
i->setSrc(0, layer); |
} else { |
i->setSrc(dim, layer); |
} |
} |
// Move the indirect reference to the first place |
if (i->tex.rIndirectSrc >= 0 && ( |
i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) { |
Value *hnd = i->getIndirectR(); |
i->setIndirectR(NULL); |
i->moveSources(0, 1); |
i->setSrc(0, hnd); |
i->tex.rIndirectSrc = 0; |
i->tex.sIndirectSrc = -1; |
} |
} else |
// (nvc0) generate and move the tsc/tic/array source to the front |
if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { |
LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa |
Value *ticRel = i->getIndirectR(); |
Value *tscRel = i->getIndirectS(); |
if (ticRel) { |
i->setSrc(i->tex.rIndirectSrc, NULL); |
if (i->tex.r) |
ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), |
ticRel, bld.mkImm(i->tex.r)); |
} |
if (tscRel) { |
i->setSrc(i->tex.sIndirectSrc, NULL); |
if (i->tex.s) |
tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), |
tscRel, bld.mkImm(i->tex.s)); |
} |
Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL; |
for (int s = dim; s >= 1; --s) |
i->setSrc(s, i->getSrc(s - 1)); |
i->setSrc(0, arrayIndex); |
if (arrayIndex) { |
int sat = (i->op == OP_TXF) ? 1 : 0; |
DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; |
bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; |
} else { |
bld.loadImm(src, 0); |
} |
if (ticRel) |
bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); |
if (tscRel) |
bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); |
i->setSrc(0, src); |
} |
// For nvc0, the sample id has to be in the second operand, as the offset |
// does. Right now we don't know how to pass both in, and this case can't |
// happen with OpenGL. On nve0, the sample id is part of the texture |
// coordinate argument. |
assert(chipset >= NVISA_GK104_CHIPSET || |
!i->tex.useOffsets || !i->tex.target.isMS()); |
// offset is between lod and dc |
if (i->tex.useOffsets) { |
int n, c; |
int s = i->srcCount(0xff, true); |
if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) { |
if (i->tex.target.isShadow()) |
s--; |
if (i->srcExists(s)) // move potential predicate out of the way |
i->moveSources(s, 1); |
if (i->tex.useOffsets == 4 && i->srcExists(s + 1)) |
i->moveSources(s + 1, 1); |
} |
if (i->op == OP_TXG) { |
// Either there is 1 offset, which goes into the 2 low bytes of the |
// first source, or there are 4 offsets, which go into 2 sources (8 |
// values, 1 byte each). |
Value *offs[2] = {NULL, NULL}; |
for (n = 0; n < i->tex.useOffsets; n++) { |
for (c = 0; c < 2; ++c) { |
if ((n % 2) == 0 && c == 0) |
offs[n / 2] = i->offset[n][c].get(); |
else |
bld.mkOp3(OP_INSBF, TYPE_U32, |
offs[n / 2], |
i->offset[n][c].get(), |
bld.mkImm(0x800 | ((n * 16 + c * 8) % 32)), |
offs[n / 2]); |
} |
} |
i->setSrc(s, offs[0]); |
if (offs[1]) |
i->setSrc(s + 1, offs[1]); |
} else { |
unsigned imm = 0; |
assert(i->tex.useOffsets == 1); |
for (c = 0; c < 3; ++c) { |
ImmediateValue val; |
if (!i->offset[0][c].getImmediate(val)) |
assert(!"non-immediate offset passed to non-TXG"); |
imm |= (val.reg.data.u32 & 0xf) << (c * 4); |
} |
if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) { |
// The offset goes into the upper 16 bits of the array index. So |
// create it if it's not already there, and INSBF it if it already |
// is. |
s = (i->tex.rIndirectSrc >= 0) ? 1 : 0; |
if (chipset >= NVISA_GM107_CHIPSET) |
s += dim; |
if (i->tex.target.isArray()) { |
bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s), |
bld.loadImm(NULL, imm), bld.mkImm(0xc10), |
i->getSrc(s)); |
} else { |
i->moveSources(s, 1); |
i->setSrc(s, bld.loadImm(NULL, imm << 16)); |
} |
} else { |
i->setSrc(s, bld.loadImm(NULL, imm)); |
} |
} |
} |
if (chipset >= NVISA_GK104_CHIPSET) { |
// |
// If TEX requires more than 4 sources, the 2nd register tuple must be |
// aligned to 4, even if it consists of just a single 4-byte register. |
// |
// XXX HACK: We insert 0 sources to avoid the 5 or 6 regs case. |
// |
int s = i->srcCount(0xff, true); |
if (s > 4 && s < 7) { |
if (i->srcExists(s)) // move potential predicate out of the way |
i->moveSources(s, 7 - s); |
while (s < 7) |
i->setSrc(s++, bld.loadImm(NULL, 0)); |
} |
} |
return true; |
} |
bool |
NVC0LoweringPass::handleManualTXD(TexInstruction *i) |
{ |
static const uint8_t qOps[4][2] = |
{ |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 |
{ QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 |
{ QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 |
}; |
Value *def[4][4]; |
Value *crd[3]; |
Instruction *tex; |
Value *zero = bld.loadImm(bld.getSSA(), 0); |
int l, c; |
const int dim = i->tex.target.getDim(); |
const int array = i->tex.target.isArray(); |
i->op = OP_TEX; // no need to clone dPdx/dPdy later |
for (c = 0; c < dim; ++c) |
crd[c] = bld.getScratch(); |
bld.mkOp(OP_QUADON, TYPE_NONE, NULL); |
for (l = 0; l < 4; ++l) { |
// mov coordinates from lane l to all lanes |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero); |
// add dPdx from lane l to lanes dx |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); |
// add dPdy from lane l to lanes dy |
for (c = 0; c < dim; ++c) |
bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); |
// texture |
bld.insert(tex = cloneForward(func, i)); |
for (c = 0; c < dim; ++c) |
tex->setSrc(c + array, crd[c]); |
// save results |
for (c = 0; i->defExists(c); ++c) { |
Instruction *mov; |
def[c][l] = bld.getSSA(); |
mov = bld.mkMov(def[c][l], tex->getDef(c)); |
mov->fixed = 1; |
mov->lanes = 1 << l; |
} |
} |
bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); |
for (c = 0; i->defExists(c); ++c) { |
Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); |
for (l = 0; l < 4; ++l) |
u->setSrc(l, def[c][l]); |
} |
i->bb->remove(i); |
return true; |
} |
bool |
NVC0LoweringPass::handleTXD(TexInstruction *txd) |
{ |
int dim = txd->tex.target.getDim(); |
unsigned arg = txd->tex.target.getArgCount(); |
unsigned expected_args = arg; |
const int chipset = prog->getTarget()->getChipset(); |
if (chipset >= NVISA_GK104_CHIPSET) { |
if (!txd->tex.target.isArray() && txd->tex.useOffsets) |
expected_args++; |
if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0) |
expected_args++; |
} else { |
if (txd->tex.useOffsets) |
expected_args++; |
if (!txd->tex.target.isArray() && ( |
txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)) |
expected_args++; |
} |
if (expected_args > 4 || |
dim > 2 || |
txd->tex.target.isShadow() || |
txd->tex.target.isCube()) |
txd->op = OP_TEX; |
handleTEX(txd); |
while (txd->srcExists(arg)) |
++arg; |
txd->tex.derivAll = true; |
if (txd->op == OP_TEX) |
return handleManualTXD(txd); |
assert(arg == expected_args); |
for (int c = 0; c < dim; ++c) { |
txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); |
txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); |
txd->dPdx[c].set(NULL); |
txd->dPdy[c].set(NULL); |
} |
return true; |
} |
bool |
NVC0LoweringPass::handleTXQ(TexInstruction *txq) |
{ |
// TODO: indirect resource/sampler index |
return true; |
} |
bool |
NVC0LoweringPass::handleTXLQ(TexInstruction *i) |
{ |
/* The outputs are inverted compared to what the TGSI instruction |
* expects. Take that into account in the mask. |
*/ |
assert((i->tex.mask & ~3) == 0); |
if (i->tex.mask == 1) |
i->tex.mask = 2; |
else if (i->tex.mask == 2) |
i->tex.mask = 1; |
handleTEX(i); |
bld.setPosition(i, true); |
/* The returned values are not quite what we want: |
* (a) convert from s16/u16 to f32 |
* (b) multiply by 1/256 |
*/ |
for (int def = 0; def < 2; ++def) { |
if (!i->defExists(def)) |
continue; |
enum DataType type = TYPE_S16; |
if (i->tex.mask == 2 || def > 0) |
type = TYPE_U16; |
bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(def), type, i->getDef(def)); |
bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(def), |
i->getDef(def), bld.loadImm(NULL, 1.0f / 256)); |
} |
if (i->tex.mask == 3) { |
LValue *t = new_LValue(func, FILE_GPR); |
bld.mkMov(t, i->getDef(0)); |
bld.mkMov(i->getDef(0), i->getDef(1)); |
bld.mkMov(i->getDef(1), t); |
} |
return true; |
} |
bool |
NVC0LoweringPass::handleATOM(Instruction *atom) |
{ |
SVSemantic sv; |
switch (atom->src(0).getFile()) { |
case FILE_MEMORY_LOCAL: |
sv = SV_LBASE; |
break; |
case FILE_MEMORY_SHARED: |
sv = SV_SBASE; |
break; |
default: |
assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL); |
return true; |
} |
Value *base = |
bld.mkOp1v(OP_RDSV, TYPE_U32, bld.getScratch(), bld.mkSysVal(sv, 0)); |
Value *ptr = atom->getIndirect(0, 0); |
atom->setSrc(0, cloneShallow(func, atom->getSrc(0))); |
atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL; |
if (ptr) |
base = bld.mkOp2v(OP_ADD, TYPE_U32, base, base, ptr); |
atom->setIndirect(0, 0, base); |
return true; |
} |
bool |
NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) |
{ |
if (cas->subOp != NV50_IR_SUBOP_ATOM_CAS && |
cas->subOp != NV50_IR_SUBOP_ATOM_EXCH) |
return false; |
bld.setPosition(cas, true); |
if (needCctl) { |
Instruction *cctl = bld.mkOp1(OP_CCTL, TYPE_NONE, NULL, cas->getSrc(0)); |
cctl->setIndirect(0, 0, cas->getIndirect(0, 0)); |
cctl->fixed = 1; |
cctl->subOp = NV50_IR_SUBOP_CCTL_IV; |
if (cas->isPredicated()) |
cctl->setPredicate(cas->cc, cas->getPredicate()); |
} |
if (cas->defExists(0) && cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { |
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source |
// should be set to the high part of the double reg or bad things will |
// happen elsewhere in the universe. |
// Also, it sometimes returns the new value instead of the old one |
// under mysterious circumstances. |
Value *dreg = bld.getSSA(8); |
bld.setPosition(cas, false); |
bld.mkOp2(OP_MERGE, TYPE_U64, dreg, cas->getSrc(1), cas->getSrc(2)); |
cas->setSrc(1, dreg); |
} |
return true; |
} |
inline Value * |
NVC0LoweringPass::loadResInfo32(Value *ptr, uint32_t off) |
{ |
uint8_t b = prog->driver->io.resInfoCBSlot; |
off += prog->driver->io.suInfoBase; |
return bld. |
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); |
} |
inline Value * |
NVC0LoweringPass::loadMsInfo32(Value *ptr, uint32_t off) |
{ |
uint8_t b = prog->driver->io.msInfoCBSlot; |
off += prog->driver->io.msInfoBase; |
return bld. |
mkLoadv(TYPE_U32, bld.mkSymbol(FILE_MEMORY_CONST, b, TYPE_U32, off), ptr); |
} |
/* On nvc0, surface info is obtained via the surface binding points passed |
* to the SULD/SUST instructions. |
* On nve4, surface info is stored in c[] and is used by various special |
* instructions, e.g. for clamping coordiantes or generating an address. |
* They couldn't just have added an equivalent to TIC now, couldn't they ? |
*/ |
#define NVE4_SU_INFO_ADDR 0x00 |
#define NVE4_SU_INFO_FMT 0x04 |
#define NVE4_SU_INFO_DIM_X 0x08 |
#define NVE4_SU_INFO_PITCH 0x0c |
#define NVE4_SU_INFO_DIM_Y 0x10 |
#define NVE4_SU_INFO_ARRAY 0x14 |
#define NVE4_SU_INFO_DIM_Z 0x18 |
#define NVE4_SU_INFO_UNK1C 0x1c |
#define NVE4_SU_INFO_WIDTH 0x20 |
#define NVE4_SU_INFO_HEIGHT 0x24 |
#define NVE4_SU_INFO_DEPTH 0x28 |
#define NVE4_SU_INFO_TARGET 0x2c |
#define NVE4_SU_INFO_CALL 0x30 |
#define NVE4_SU_INFO_RAW_X 0x34 |
#define NVE4_SU_INFO_MS_X 0x38 |
#define NVE4_SU_INFO_MS_Y 0x3c |
#define NVE4_SU_INFO__STRIDE 0x40 |
#define NVE4_SU_INFO_DIM(i) (0x08 + (i) * 8) |
#define NVE4_SU_INFO_SIZE(i) (0x20 + (i) * 4) |
#define NVE4_SU_INFO_MS(i) (0x38 + (i) * 4) |
static inline uint16_t getSuClampSubOp(const TexInstruction *su, int c) |
{ |
switch (su->tex.target.getEnum()) { |
case TEX_TARGET_BUFFER: return NV50_IR_SUBOP_SUCLAMP_PL(0, 1); |
case TEX_TARGET_RECT: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_1D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_1D_ARRAY: return (c == 1) ? |
NV50_IR_SUBOP_SUCLAMP_PL(0, 2) : |
NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_2D: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); |
case TEX_TARGET_2D_MS: return NV50_IR_SUBOP_SUCLAMP_BL(0, 2); |
case TEX_TARGET_2D_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_2D_MS_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_3D: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_CUBE: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
case TEX_TARGET_CUBE_ARRAY: return NV50_IR_SUBOP_SUCLAMP_SD(0, 2); |
default: |
assert(0); |
return 0; |
} |
} |
void |
NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex) |
{ |
const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE; |
const int arg = tex->tex.target.getArgCount(); |
if (tex->tex.target == TEX_TARGET_2D_MS) |
tex->tex.target = TEX_TARGET_2D; |
else |
if (tex->tex.target == TEX_TARGET_2D_MS_ARRAY) |
tex->tex.target = TEX_TARGET_2D_ARRAY; |
else |
return; |
Value *x = tex->getSrc(0); |
Value *y = tex->getSrc(1); |
Value *s = tex->getSrc(arg - 1); |
Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA(); |
Value *ms_x = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(0)); |
Value *ms_y = loadResInfo32(NULL, base + NVE4_SU_INFO_MS(1)); |
bld.mkOp2(OP_SHL, TYPE_U32, tx, x, ms_x); |
bld.mkOp2(OP_SHL, TYPE_U32, ty, y, ms_y); |
s = bld.mkOp2v(OP_AND, TYPE_U32, ts, s, bld.loadImm(NULL, 0x7)); |
s = bld.mkOp2v(OP_SHL, TYPE_U32, ts, ts, bld.mkImm(3)); |
Value *dx = loadMsInfo32(ts, 0x0); |
Value *dy = loadMsInfo32(ts, 0x4); |
bld.mkOp2(OP_ADD, TYPE_U32, tx, tx, dx); |
bld.mkOp2(OP_ADD, TYPE_U32, ty, ty, dy); |
tex->setSrc(0, tx); |
tex->setSrc(1, ty); |
tex->moveSources(arg, -1); |
} |
// Sets 64-bit "generic address", predicate and format sources for SULD/SUST. |
// They're computed from the coordinates using the surface info in c[] space. |
void |
NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su) |
{ |
Instruction *insn; |
const bool atom = su->op == OP_SUREDB || su->op == OP_SUREDP; |
const bool raw = |
su->op == OP_SULDB || su->op == OP_SUSTB || su->op == OP_SUREDB; |
const int idx = su->tex.r; |
const int dim = su->tex.target.getDim(); |
const int arg = dim + (su->tex.target.isArray() ? 1 : 0); |
const uint16_t base = idx * NVE4_SU_INFO__STRIDE; |
int c; |
Value *zero = bld.mkImm(0); |
Value *p1 = NULL; |
Value *v; |
Value *src[3]; |
Value *bf, *eau, *off; |
Value *addr, *pred; |
off = bld.getScratch(4); |
bf = bld.getScratch(4); |
addr = bld.getSSA(8); |
pred = bld.getScratch(1, FILE_PREDICATE); |
bld.setPosition(su, false); |
adjustCoordinatesMS(su); |
// calculate clamped coordinates |
for (c = 0; c < arg; ++c) { |
src[c] = bld.getScratch(); |
if (c == 0 && raw) |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_RAW_X); |
else |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_DIM(c)); |
bld.mkOp3(OP_SUCLAMP, TYPE_S32, src[c], su->getSrc(c), v, zero) |
->subOp = getSuClampSubOp(su, c); |
} |
for (; c < 3; ++c) |
src[c] = zero; |
// set predicate output |
if (su->tex.target == TEX_TARGET_BUFFER) { |
src[0]->getInsn()->setFlagsDef(1, pred); |
} else |
if (su->tex.target.isArray()) { |
p1 = bld.getSSA(1, FILE_PREDICATE); |
src[dim]->getInsn()->setFlagsDef(1, p1); |
} |
// calculate pixel offset |
if (dim == 1) { |
if (su->tex.target != TEX_TARGET_BUFFER) |
bld.mkOp2(OP_AND, TYPE_U32, off, src[0], bld.loadImm(NULL, 0xffff)); |
} else |
if (dim == 3) { |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); |
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1]) |
->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); |
bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0]) |
->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l |
} else { |
assert(dim == 2); |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_PITCH); |
bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0]) |
->subOp = su->tex.target.isArray() ? |
NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l |
} |
// calculate effective address part 1 |
if (su->tex.target == TEX_TARGET_BUFFER) { |
if (raw) { |
bf = src[0]; |
} else { |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); |
bld.mkOp3(OP_VSHL, TYPE_U32, bf, src[0], v, zero) |
->subOp = NV50_IR_SUBOP_V1(7,6,8|2); |
} |
} else { |
Value *y = src[1]; |
Value *z = src[2]; |
uint16_t subOp = 0; |
switch (dim) { |
case 1: |
y = zero; |
z = zero; |
break; |
case 2: |
z = off; |
if (!su->tex.target.isArray()) { |
z = loadResInfo32(NULL, base + NVE4_SU_INFO_UNK1C); |
subOp = NV50_IR_SUBOP_SUBFM_3D; |
} |
break; |
default: |
subOp = NV50_IR_SUBOP_SUBFM_3D; |
assert(dim == 3); |
break; |
} |
insn = bld.mkOp3(OP_SUBFM, TYPE_U32, bf, src[0], y, z); |
insn->subOp = subOp; |
insn->setFlagsDef(1, pred); |
} |
// part 2 |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ADDR); |
if (su->tex.target == TEX_TARGET_BUFFER) { |
eau = v; |
} else { |
eau = bld.mkOp3v(OP_SUEAU, TYPE_U32, bld.getScratch(4), off, bf, v); |
} |
// add array layer offset |
if (su->tex.target.isArray()) { |
v = loadResInfo32(NULL, base + NVE4_SU_INFO_ARRAY); |
if (dim == 1) |
bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau) |
->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32 |
else |
bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau) |
->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32 |
// combine predicates |
assert(p1); |
bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1); |
} |
if (atom) { |
Value *lo = bf; |
if (su->tex.target == TEX_TARGET_BUFFER) { |
lo = zero; |
bld.mkMov(off, bf); |
} |
// bf == g[] address & 0xff |
// eau == g[] address >> 8 |
bld.mkOp3(OP_PERMT, TYPE_U32, bf, lo, bld.loadImm(NULL, 0x6540), eau); |
bld.mkOp3(OP_PERMT, TYPE_U32, eau, zero, bld.loadImm(NULL, 0x0007), eau); |
} else |
if (su->op == OP_SULDP && su->tex.target == TEX_TARGET_BUFFER) { |
// Convert from u32 to u8 address format, which is what the library code |
// doing SULDP currently uses. |
// XXX: can SUEAU do this ? |
// XXX: does it matter that we don't mask high bytes in bf ? |
// Grrr. |
bld.mkOp2(OP_SHR, TYPE_U32, off, bf, bld.mkImm(8)); |
bld.mkOp2(OP_ADD, TYPE_U32, eau, eau, off); |
} |
bld.mkOp2(OP_MERGE, TYPE_U64, addr, bf, eau); |
if (atom && su->tex.target == TEX_TARGET_BUFFER) |
bld.mkOp2(OP_ADD, TYPE_U64, addr, addr, off); |
// let's just set it 0 for raw access and hope it works |
v = raw ? |
bld.mkImm(0) : loadResInfo32(NULL, base + NVE4_SU_INFO_FMT); |
// get rid of old coordinate sources, make space for fmt info and predicate |
su->moveSources(arg, 3 - arg); |
// set 64 bit address and 32-bit format sources |
su->setSrc(0, addr); |
su->setSrc(1, v); |
su->setSrc(2, pred); |
} |
void |
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su) |
{ |
processSurfaceCoordsNVE4(su); |
// Who do we hate more ? The person who decided that nvc0's SULD doesn't |
// have to support conversion or the person who decided that, in OpenCL, |
// you don't have to specify the format here like you do in OpenGL ? |
if (su->op == OP_SULDP) { |
// We don't patch shaders. Ever. |
// You get an indirect call to our library blob here. |
// But at least it's uniform. |
FlowInstruction *call; |
LValue *p[3]; |
LValue *r[5]; |
uint16_t base = su->tex.r * NVE4_SU_INFO__STRIDE + NVE4_SU_INFO_CALL; |
for (int i = 0; i < 4; ++i) |
(r[i] = bld.getScratch(4, FILE_GPR))->reg.data.id = i; |
for (int i = 0; i < 3; ++i) |
(p[i] = bld.getScratch(1, FILE_PREDICATE))->reg.data.id = i; |
(r[4] = bld.getScratch(8, FILE_GPR))->reg.data.id = 4; |
bld.mkMov(p[1], bld.mkImm((su->cache == CACHE_CA) ? 1 : 0), TYPE_U8); |
bld.mkMov(p[2], bld.mkImm((su->cache == CACHE_CG) ? 1 : 0), TYPE_U8); |
bld.mkMov(p[0], su->getSrc(2), TYPE_U8); |
bld.mkMov(r[4], su->getSrc(0), TYPE_U64); |
bld.mkMov(r[2], su->getSrc(1), TYPE_U32); |
call = bld.mkFlow(OP_CALL, NULL, su->cc, su->getPredicate()); |
call->indirect = 1; |
call->absolute = 1; |
call->setSrc(0, bld.mkSymbol(FILE_MEMORY_CONST, |
prog->driver->io.resInfoCBSlot, TYPE_U32, |
prog->driver->io.suInfoBase + base)); |
call->setSrc(1, r[2]); |
call->setSrc(2, r[4]); |
for (int i = 0; i < 3; ++i) |
call->setSrc(3 + i, p[i]); |
for (int i = 0; i < 4; ++i) { |
call->setDef(i, r[i]); |
bld.mkMov(su->getDef(i), r[i]); |
} |
call->setDef(4, p[1]); |
delete_Instruction(bld.getProgram(), su); |
} |
if (su->op == OP_SUREDB || su->op == OP_SUREDP) { |
// FIXME: for out of bounds access, destination value will be undefined ! |
Value *pred = su->getSrc(2); |
CondCode cc = CC_NOT_P; |
if (su->getPredicate()) { |
pred = bld.getScratch(1, FILE_PREDICATE); |
cc = su->cc; |
if (cc == CC_NOT_P) { |
bld.mkOp2(OP_OR, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); |
} else { |
bld.mkOp2(OP_AND, TYPE_U8, pred, su->getPredicate(), su->getSrc(2)); |
pred->getInsn()->src(1).mod = Modifier(NV50_IR_MOD_NOT); |
} |
} |
Instruction *red = bld.mkOp(OP_ATOM, su->dType, su->getDef(0)); |
red->subOp = su->subOp; |
if (!gMemBase) |
gMemBase = bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, TYPE_U32, 0); |
red->setSrc(0, gMemBase); |
red->setSrc(1, su->getSrc(3)); |
if (su->subOp == NV50_IR_SUBOP_ATOM_CAS) |
red->setSrc(2, su->getSrc(4)); |
red->setIndirect(0, 0, su->getSrc(0)); |
red->setPredicate(cc, pred); |
delete_Instruction(bld.getProgram(), su); |
handleCasExch(red, true); |
} else { |
su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8; |
} |
} |
bool |
NVC0LoweringPass::handleWRSV(Instruction *i) |
{ |
Instruction *st; |
Symbol *sym; |
uint32_t addr; |
// must replace, $sreg are not writeable |
addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); |
if (addr >= 0x400) |
return false; |
sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); |
st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), |
i->getSrc(1)); |
st->perPatch = i->perPatch; |
bld.getBB()->remove(i); |
return true; |
} |
void |
NVC0LoweringPass::readTessCoord(LValue *dst, int c) |
{ |
Value *laneid = bld.getSSA(); |
Value *x, *y; |
bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); |
if (c == 0) { |
x = dst; |
y = NULL; |
} else |
if (c == 1) { |
x = NULL; |
y = dst; |
} else { |
assert(c == 2); |
x = bld.getSSA(); |
y = bld.getSSA(); |
} |
if (x) |
bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); |
if (y) |
bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); |
if (c == 2) { |
bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); |
bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); |
} |
} |
bool |
NVC0LoweringPass::handleRDSV(Instruction *i) |
{ |
Symbol *sym = i->getSrc(0)->asSym(); |
const SVSemantic sv = sym->reg.data.sv.sv; |
Value *vtx = NULL; |
Instruction *ld; |
uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); |
if (addr >= 0x400) { |
// mov $sreg |
if (sym->reg.data.sv.index == 3) { |
// TGSI backend may use 4th component of TID,NTID,CTAID,NCTAID |
i->op = OP_MOV; |
i->setSrc(0, bld.mkImm((sv == SV_NTID || sv == SV_NCTAID) ? 1 : 0)); |
} |
return true; |
} |
switch (sv) { |
case SV_POSITION: |
assert(prog->getType() == Program::TYPE_FRAGMENT); |
if (i->srcExists(1)) { |
// Pass offset through to the interpolation logic |
ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET, |
i->getDef(0), addr, NULL); |
ld->setSrc(1, i->getSrc(1)); |
} else { |
bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); |
} |
break; |
case SV_FACE: |
{ |
Value *face = i->getDef(0); |
bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); |
if (i->dType == TYPE_F32) { |
bld.mkOp2(OP_OR, TYPE_U32, face, face, bld.mkImm(0x00000001)); |
bld.mkOp1(OP_NEG, TYPE_S32, face, face); |
bld.mkCvt(OP_CVT, TYPE_F32, face, TYPE_S32, face); |
} |
} |
break; |
case SV_TESS_COORD: |
assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); |
readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); |
break; |
case SV_NTID: |
case SV_NCTAID: |
case SV_GRIDID: |
assert(targ->getChipset() >= NVISA_GK104_CHIPSET); // mov $sreg otherwise |
if (sym->reg.data.sv.index == 3) { |
i->op = OP_MOV; |
i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1)); |
return true; |
} |
addr += prog->driver->prop.cp.gridInfoBase; |
bld.mkLoad(TYPE_U32, i->getDef(0), |
bld.mkSymbol(FILE_MEMORY_CONST, 0, TYPE_U32, addr), NULL); |
break; |
case SV_SAMPLE_INDEX: |
// TODO: Properly pass source as an address in the PIX address space |
// (which can be of the form [r0+offset]). But this is currently |
// unnecessary. |
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); |
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; |
break; |
case SV_SAMPLE_POS: { |
Value *off = new_LValue(func, FILE_GPR); |
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); |
ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID; |
bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3)); |
bld.mkLoad(TYPE_F32, |
i->getDef(0), |
bld.mkSymbol( |
FILE_MEMORY_CONST, prog->driver->io.resInfoCBSlot, |
TYPE_U32, prog->driver->io.sampleInfoBase + |
4 * sym->reg.data.sv.index), |
off); |
break; |
} |
case SV_SAMPLE_MASK: |
ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0)); |
ld->subOp = NV50_IR_SUBOP_PIXLD_COVMASK; |
break; |
default: |
if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) |
vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); |
ld = bld.mkFetch(i->getDef(0), i->dType, |
FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); |
ld->perPatch = i->perPatch; |
break; |
} |
bld.getBB()->remove(i); |
return true; |
} |
bool |
NVC0LoweringPass::handleDIV(Instruction *i) |
{ |
if (!isFloatType(i->dType)) |
return true; |
bld.setPosition(i, false); |
Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(typeSizeof(i->dType)), i->getSrc(1)); |
i->op = OP_MUL; |
i->setSrc(1, rcp->getDef(0)); |
return true; |
} |
bool |
NVC0LoweringPass::handleMOD(Instruction *i) |
{ |
if (!isFloatType(i->dType)) |
return true; |
LValue *value = bld.getScratch(typeSizeof(i->dType)); |
bld.mkOp1(OP_RCP, i->dType, value, i->getSrc(1)); |
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(0), value); |
bld.mkOp1(OP_TRUNC, i->dType, value, value); |
bld.mkOp2(OP_MUL, i->dType, value, i->getSrc(1), value); |
i->op = OP_SUB; |
i->setSrc(1, value); |
return true; |
} |
bool |
NVC0LoweringPass::handleSQRT(Instruction *i) |
{ |
Value *pred = bld.getSSA(1, FILE_PREDICATE); |
Value *zero = bld.getSSA(); |
Instruction *rsq; |
bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0)); |
if (i->dType == TYPE_F64) |
zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero); |
bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero); |
bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred); |
rsq = bld.mkOp1(OP_RSQ, i->dType, |
bld.getSSA(typeSizeof(i->dType)), i->getSrc(0)); |
rsq->setPredicate(CC_NOT_P, pred); |
i->op = OP_MUL; |
i->setSrc(1, rsq->getDef(0)); |
i->setPredicate(CC_NOT_P, pred); |
return true; |
} |
bool |
NVC0LoweringPass::handlePOW(Instruction *i) |
{ |
LValue *val = bld.getScratch(); |
bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); |
bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; |
bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); |
i->op = OP_EX2; |
i->setSrc(0, val); |
i->setSrc(1, NULL); |
return true; |
} |
bool |
NVC0LoweringPass::handleEXPORT(Instruction *i) |
{ |
if (prog->getType() == Program::TYPE_FRAGMENT) { |
int id = i->getSrc(0)->reg.data.offset / 4; |
if (i->src(0).isIndirect(0)) // TODO, ugly |
return false; |
i->op = OP_MOV; |
i->subOp = NV50_IR_SUBOP_MOV_FINAL; |
i->src(0).set(i->src(1)); |
i->setSrc(1, NULL); |
i->setDef(0, new_LValue(func, FILE_GPR)); |
i->getDef(0)->reg.data.id = id; |
prog->maxGPR = MAX2(prog->maxGPR, id); |
} else |
if (prog->getType() == Program::TYPE_GEOMETRY) { |
i->setIndirect(0, 1, gpEmitAddress); |
} |
return true; |
} |
bool |
NVC0LoweringPass::handleOUT(Instruction *i) |
{ |
Instruction *prev = i->prev; |
ImmediateValue stream, prevStream; |
// Only merge if the stream ids match. Also, note that the previous |
// instruction would have already been lowered, so we take arg1 from it. |
if (i->op == OP_RESTART && prev && prev->op == OP_EMIT && |
i->src(0).getImmediate(stream) && |
prev->src(1).getImmediate(prevStream) && |
stream.reg.data.u32 == prevStream.reg.data.u32) { |
i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; |
delete_Instruction(prog, i); |
} else { |
assert(gpEmitAddress); |
i->setDef(0, gpEmitAddress); |
i->setSrc(1, i->getSrc(0)); |
i->setSrc(0, gpEmitAddress); |
} |
return true; |
} |
// Generate a binary predicate if an instruction is predicated by |
// e.g. an f32 value. |
void |
NVC0LoweringPass::checkPredicate(Instruction *insn) |
{ |
Value *pred = insn->getPredicate(); |
Value *pdst; |
if (!pred || pred->reg.file == FILE_PREDICATE) |
return; |
pdst = new_LValue(func, FILE_PREDICATE); |
// CAUTION: don't use pdst->getInsn, the definition might not be unique, |
// delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass |
bld.mkCmp(OP_SET, CC_NEU, insn->dType, pdst, insn->dType, bld.mkImm(0), pred); |
insn->setPredicate(insn->cc, pdst); |
} |
// |
// - add quadop dance for texturing |
// - put FP outputs in GPRs |
// - convert instruction sequences |
// |
bool |
NVC0LoweringPass::visit(Instruction *i) |
{ |
bld.setPosition(i, false); |
if (i->cc != CC_ALWAYS) |
checkPredicate(i); |
switch (i->op) { |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXF: |
case OP_TXG: |
return handleTEX(i->asTex()); |
case OP_TXD: |
return handleTXD(i->asTex()); |
case OP_TXLQ: |
return handleTXLQ(i->asTex()); |
case OP_TXQ: |
return handleTXQ(i->asTex()); |
case OP_EX2: |
bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); |
i->setSrc(0, i->getDef(0)); |
break; |
case OP_POW: |
return handlePOW(i); |
case OP_DIV: |
return handleDIV(i); |
case OP_MOD: |
return handleMOD(i); |
case OP_SQRT: |
return handleSQRT(i); |
case OP_EXPORT: |
return handleEXPORT(i); |
case OP_EMIT: |
case OP_RESTART: |
return handleOUT(i); |
case OP_RDSV: |
return handleRDSV(i); |
case OP_WRSV: |
return handleWRSV(i); |
case OP_LOAD: |
if (i->src(0).getFile() == FILE_SHADER_INPUT) { |
if (prog->getType() == Program::TYPE_COMPUTE) { |
i->getSrc(0)->reg.file = FILE_MEMORY_CONST; |
i->getSrc(0)->reg.fileIndex = 0; |
} else |
if (prog->getType() == Program::TYPE_GEOMETRY && |
i->src(0).isIndirect(0)) { |
// XXX: this assumes vec4 units |
Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 0), bld.mkImm(4)); |
i->setIndirect(0, 0, ptr); |
i->op = OP_VFETCH; |
} else { |
i->op = OP_VFETCH; |
assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP |
} |
} else if (i->src(0).getFile() == FILE_MEMORY_CONST) { |
if (i->src(0).isIndirect(1)) { |
Value *ptr; |
if (i->src(0).isIndirect(0)) |
ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 1), bld.mkImm(0x1010), |
i->getIndirect(0, 0)); |
else |
ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(), |
i->getIndirect(0, 1), bld.mkImm(16)); |
i->setIndirect(0, 1, NULL); |
i->setIndirect(0, 0, ptr); |
i->subOp = NV50_IR_SUBOP_LDC_IS; |
} |
} |
break; |
case OP_ATOM: |
{ |
const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL; |
handleATOM(i); |
handleCasExch(i, cctl); |
} |
break; |
case OP_SULDB: |
case OP_SULDP: |
case OP_SUSTB: |
case OP_SUSTP: |
case OP_SUREDB: |
case OP_SUREDP: |
if (targ->getChipset() >= NVISA_GK104_CHIPSET) |
handleSurfaceOpNVE4(i->asTex()); |
break; |
default: |
break; |
} |
return true; |
} |
bool |
TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const |
{ |
if (stage == CG_STAGE_PRE_SSA) { |
NVC0LoweringPass pass(prog); |
return pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_POST_RA) { |
NVC0LegalizePostRA pass(prog); |
return pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_SSA) { |
NVC0LegalizeSSA pass; |
return pass.run(prog, false, true); |
} |
return false; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h |
---|
0,0 → 1,138 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include <tr1/unordered_set> |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_build_util.h" |
namespace nv50_ir { |
class NVC0LegalizeSSA : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
virtual bool visit(Function *); |
// we want to insert calls to the builtin library only after optimization |
void handleDIV(Instruction *); // integer division, modulus |
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt |
void handleFTZ(Instruction *); |
private: |
BuildUtil bld; |
}; |
class NVC0LegalizePostRA : public Pass |
{ |
public: |
NVC0LegalizePostRA(const Program *); |
private: |
virtual bool visit(Function *); |
virtual bool visit(BasicBlock *); |
void replaceZero(Instruction *); |
bool tryReplaceContWithBra(BasicBlock *); |
void propagateJoin(BasicBlock *); |
struct TexUse |
{ |
TexUse(Instruction *use, const Instruction *tex) |
: insn(use), tex(tex), level(-1) { } |
Instruction *insn; |
const Instruction *tex; // or split / mov |
int level; |
}; |
struct Limits |
{ |
Limits() { } |
Limits(int min, int max) : min(min), max(max) { } |
int min, max; |
}; |
bool insertTextureBarriers(Function *); |
inline bool insnDominatedBy(const Instruction *, const Instruction *) const; |
void findFirstUses(const Instruction *tex, const Instruction *def, |
std::list<TexUse>&, |
std::tr1::unordered_set<const Instruction *>&); |
void findOverwritingDefs(const Instruction *tex, Instruction *insn, |
const BasicBlock *term, |
std::list<TexUse>&); |
void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *); |
const Instruction *recurseDef(const Instruction *); |
private: |
LValue *rZero; |
LValue *carry; |
const bool needTexBar; |
}; |
class NVC0LoweringPass : public Pass |
{ |
public: |
NVC0LoweringPass(Program *); |
protected: |
bool handleRDSV(Instruction *); |
bool handleWRSV(Instruction *); |
bool handleEXPORT(Instruction *); |
bool handleOUT(Instruction *); |
bool handleDIV(Instruction *); |
bool handleMOD(Instruction *); |
bool handleSQRT(Instruction *); |
bool handlePOW(Instruction *); |
bool handleTEX(TexInstruction *); |
bool handleTXD(TexInstruction *); |
bool handleTXQ(TexInstruction *); |
virtual bool handleManualTXD(TexInstruction *); |
bool handleTXLQ(TexInstruction *); |
bool handleATOM(Instruction *); |
bool handleCasExch(Instruction *, bool needCctl); |
void handleSurfaceOpNVE4(TexInstruction *); |
void checkPredicate(Instruction *); |
private: |
virtual bool visit(Function *); |
virtual bool visit(BasicBlock *); |
virtual bool visit(Instruction *); |
void readTessCoord(LValue *dst, int c); |
Value *loadResInfo32(Value *ptr, uint32_t off); |
Value *loadMsInfo32(Value *ptr, uint32_t off); |
Value *loadTexHandle(Value *ptr, unsigned int slot); |
void adjustCoordinatesMS(TexInstruction *); |
void processSurfaceCoordsNVE4(TexInstruction *); |
protected: |
BuildUtil bld; |
private: |
const Target *const targ; |
Symbol *gMemBase; |
LValue *gpEmitAddress; |
}; |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp |
---|
0,0 → 1,2740 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
#include "codegen/nv50_ir_build_util.h" |
extern "C" { |
#include "util/u_math.h" |
} |
namespace nv50_ir { |
bool |
Instruction::isNop() const |
{ |
if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT) |
return true; |
if (terminator || join) // XXX: should terminator imply flow ? |
return false; |
if (op == OP_ATOM) |
return false; |
if (!fixed && op == OP_NOP) |
return true; |
if (defExists(0) && def(0).rep()->reg.data.id < 0) { |
for (int d = 1; defExists(d); ++d) |
if (def(d).rep()->reg.data.id >= 0) |
WARN("part of vector result is unused !\n"); |
return true; |
} |
if (op == OP_MOV || op == OP_UNION) { |
if (!getDef(0)->equals(getSrc(0))) |
return false; |
if (op == OP_UNION) |
if (!def(0).rep()->equals(getSrc(1))) |
return false; |
return true; |
} |
return false; |
} |
bool Instruction::isDead() const |
{ |
if (op == OP_STORE || |
op == OP_EXPORT || |
op == OP_ATOM || |
op == OP_SUSTB || op == OP_SUSTP || op == OP_SUREDP || op == OP_SUREDB || |
op == OP_WRSV) |
return false; |
for (int d = 0; defExists(d); ++d) |
if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) |
return false; |
if (terminator || asFlow()) |
return false; |
if (fixed) |
return false; |
return true; |
}; |
// ============================================================================= |
class CopyPropagation : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
}; |
// Propagate all MOVs forward to make subsequent optimization easier, except if |
// the sources stem from a phi, in which case we don't want to mess up potential |
// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. |
bool |
CopyPropagation::visit(BasicBlock *bb) |
{ |
Instruction *mov, *si, *next; |
for (mov = bb->getEntry(); mov; mov = next) { |
next = mov->next; |
if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) |
continue; |
if (mov->getPredicate()) |
continue; |
if (mov->def(0).getFile() != mov->src(0).getFile()) |
continue; |
si = mov->getSrc(0)->getInsn(); |
if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { |
// propagate |
mov->def(0).replace(mov->getSrc(0), false); |
delete_Instruction(prog, mov); |
} |
} |
return true; |
} |
// ============================================================================= |
class MergeSplits : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
}; |
// For SPLIT / MERGE pairs that operate on the same registers, replace the |
// post-merge def with the SPLIT's source. |
bool |
MergeSplits::visit(BasicBlock *bb) |
{ |
Instruction *i, *next, *si; |
for (i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (i->op != OP_MERGE || typeSizeof(i->dType) != 8) |
continue; |
si = i->getSrc(0)->getInsn(); |
if (si->op != OP_SPLIT || si != i->getSrc(1)->getInsn()) |
continue; |
i->def(0).replace(si->getSrc(0), false); |
delete_Instruction(prog, i); |
} |
return true; |
} |
// ============================================================================= |
class LoadPropagation : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
void checkSwapSrc01(Instruction *); |
bool isCSpaceLoad(Instruction *); |
bool isImmd32Load(Instruction *); |
bool isAttribOrSharedLoad(Instruction *); |
}; |
bool |
LoadPropagation::isCSpaceLoad(Instruction *ld) |
{ |
return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; |
} |
bool |
LoadPropagation::isImmd32Load(Instruction *ld) |
{ |
if (!ld || (ld->op != OP_MOV) || (typeSizeof(ld->dType) != 4)) |
return false; |
return ld->src(0).getFile() == FILE_IMMEDIATE; |
} |
bool |
LoadPropagation::isAttribOrSharedLoad(Instruction *ld) |
{ |
return ld && |
(ld->op == OP_VFETCH || |
(ld->op == OP_LOAD && |
(ld->src(0).getFile() == FILE_SHADER_INPUT || |
ld->src(0).getFile() == FILE_MEMORY_SHARED))); |
} |
void |
LoadPropagation::checkSwapSrc01(Instruction *insn) |
{ |
if (!prog->getTarget()->getOpInfo(insn).commutative) |
if (insn->op != OP_SET && insn->op != OP_SLCT) |
return; |
if (insn->src(1).getFile() != FILE_GPR) |
return; |
Instruction *i0 = insn->getSrc(0)->getInsn(); |
Instruction *i1 = insn->getSrc(1)->getInsn(); |
if (isCSpaceLoad(i0)) { |
if (!isCSpaceLoad(i1)) |
insn->swapSources(0, 1); |
else |
return; |
} else |
if (isImmd32Load(i0)) { |
if (!isCSpaceLoad(i1) && !isImmd32Load(i1)) |
insn->swapSources(0, 1); |
else |
return; |
} else |
if (isAttribOrSharedLoad(i1)) { |
if (!isAttribOrSharedLoad(i0)) |
insn->swapSources(0, 1); |
else |
return; |
} else { |
return; |
} |
if (insn->op == OP_SET || insn->op == OP_SET_AND || |
insn->op == OP_SET_OR || insn->op == OP_SET_XOR) |
insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); |
else |
if (insn->op == OP_SLCT) |
insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); |
} |
bool |
LoadPropagation::visit(BasicBlock *bb) |
{ |
const Target *targ = prog->getTarget(); |
Instruction *next; |
for (Instruction *i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (i->op == OP_CALL) // calls have args as sources, they must be in regs |
continue; |
if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg |
continue; |
if (i->srcExists(1)) |
checkSwapSrc01(i); |
for (int s = 0; i->srcExists(s); ++s) { |
Instruction *ld = i->getSrc(s)->getInsn(); |
if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) |
continue; |
if (!targ->insnCanLoad(i, s, ld)) |
continue; |
// propagate ! |
i->setSrc(s, ld->getSrc(0)); |
if (ld->src(0).isIndirect(0)) |
i->setIndirect(s, 0, ld->getIndirect(0, 0)); |
if (ld->getDef(0)->refCount() == 0) |
delete_Instruction(prog, ld); |
} |
} |
return true; |
} |
// ============================================================================= |
// Evaluate constant expressions. |
class ConstantFolding : public Pass |
{ |
public: |
bool foldAll(Program *); |
private: |
virtual bool visit(BasicBlock *); |
void expr(Instruction *, ImmediateValue&, ImmediateValue&); |
void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); |
void opnd(Instruction *, ImmediateValue&, int s); |
void unary(Instruction *, const ImmediateValue&); |
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); |
// TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET |
CmpInstruction *findOriginForTestWithZero(Value *); |
unsigned int foldCount; |
BuildUtil bld; |
}; |
// TODO: remember generated immediates and only revisit these |
bool |
ConstantFolding::foldAll(Program *prog) |
{ |
unsigned int iterCount = 0; |
do { |
foldCount = 0; |
if (!run(prog)) |
return false; |
} while (foldCount && ++iterCount < 2); |
return true; |
} |
bool |
ConstantFolding::visit(BasicBlock *bb) |
{ |
Instruction *i, *next; |
for (i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (i->op == OP_MOV || i->op == OP_CALL) |
continue; |
ImmediateValue src0, src1, src2; |
if (i->srcExists(2) && |
i->src(0).getImmediate(src0) && |
i->src(1).getImmediate(src1) && |
i->src(2).getImmediate(src2)) |
expr(i, src0, src1, src2); |
else |
if (i->srcExists(1) && |
i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) |
expr(i, src0, src1); |
else |
if (i->srcExists(0) && i->src(0).getImmediate(src0)) |
opnd(i, src0, 0); |
else |
if (i->srcExists(1) && i->src(1).getImmediate(src1)) |
opnd(i, src1, 1); |
} |
return true; |
} |
CmpInstruction * |
ConstantFolding::findOriginForTestWithZero(Value *value) |
{ |
if (!value) |
return NULL; |
Instruction *insn = value->getInsn(); |
while (insn && insn->op != OP_SET) { |
Instruction *next = NULL; |
switch (insn->op) { |
case OP_NEG: |
case OP_ABS: |
case OP_CVT: |
next = insn->getSrc(0)->getInsn(); |
if (insn->sType != next->dType) |
return NULL; |
break; |
case OP_MOV: |
next = insn->getSrc(0)->getInsn(); |
break; |
default: |
return NULL; |
} |
insn = next; |
} |
return insn ? insn->asCmp() : NULL; |
} |
void |
Modifier::applyTo(ImmediateValue& imm) const |
{ |
if (!bits) // avoid failure if imm.reg.type is unhandled (e.g. b128) |
return; |
switch (imm.reg.type) { |
case TYPE_F32: |
if (bits & NV50_IR_MOD_ABS) |
imm.reg.data.f32 = fabsf(imm.reg.data.f32); |
if (bits & NV50_IR_MOD_NEG) |
imm.reg.data.f32 = -imm.reg.data.f32; |
if (bits & NV50_IR_MOD_SAT) { |
if (imm.reg.data.f32 < 0.0f) |
imm.reg.data.f32 = 0.0f; |
else |
if (imm.reg.data.f32 > 1.0f) |
imm.reg.data.f32 = 1.0f; |
} |
assert(!(bits & NV50_IR_MOD_NOT)); |
break; |
case TYPE_S8: // NOTE: will be extended |
case TYPE_S16: |
case TYPE_S32: |
case TYPE_U8: // NOTE: treated as signed |
case TYPE_U16: |
case TYPE_U32: |
if (bits & NV50_IR_MOD_ABS) |
imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? |
imm.reg.data.s32 : -imm.reg.data.s32; |
if (bits & NV50_IR_MOD_NEG) |
imm.reg.data.s32 = -imm.reg.data.s32; |
if (bits & NV50_IR_MOD_NOT) |
imm.reg.data.s32 = ~imm.reg.data.s32; |
break; |
case TYPE_F64: |
if (bits & NV50_IR_MOD_ABS) |
imm.reg.data.f64 = fabs(imm.reg.data.f64); |
if (bits & NV50_IR_MOD_NEG) |
imm.reg.data.f64 = -imm.reg.data.f64; |
if (bits & NV50_IR_MOD_SAT) { |
if (imm.reg.data.f64 < 0.0) |
imm.reg.data.f64 = 0.0; |
else |
if (imm.reg.data.f64 > 1.0) |
imm.reg.data.f64 = 1.0; |
} |
assert(!(bits & NV50_IR_MOD_NOT)); |
break; |
default: |
assert(!"invalid/unhandled type"); |
imm.reg.data.u64 = 0; |
break; |
} |
} |
operation |
Modifier::getOp() const |
{ |
switch (bits) { |
case NV50_IR_MOD_ABS: return OP_ABS; |
case NV50_IR_MOD_NEG: return OP_NEG; |
case NV50_IR_MOD_SAT: return OP_SAT; |
case NV50_IR_MOD_NOT: return OP_NOT; |
case 0: |
return OP_MOV; |
default: |
return OP_CVT; |
} |
} |
void |
ConstantFolding::expr(Instruction *i, |
ImmediateValue &imm0, ImmediateValue &imm1) |
{ |
struct Storage *const a = &imm0.reg, *const b = &imm1.reg; |
struct Storage res; |
memset(&res.data, 0, sizeof(res.data)); |
switch (i->op) { |
case OP_MAD: |
case OP_FMA: |
case OP_MUL: |
if (i->dnz && i->dType == TYPE_F32) { |
if (!isfinite(a->data.f32)) |
a->data.f32 = 0.0f; |
if (!isfinite(b->data.f32)) |
b->data.f32 = 0.0f; |
} |
switch (i->dType) { |
case TYPE_F32: |
res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); |
break; |
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; |
case TYPE_S32: |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { |
res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32; |
break; |
} |
/* fallthrough */ |
case TYPE_U32: |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { |
res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32; |
break; |
} |
res.data.u32 = a->data.u32 * b->data.u32; break; |
default: |
return; |
} |
break; |
case OP_DIV: |
if (b->data.u32 == 0) |
break; |
switch (i->dType) { |
case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; |
case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; |
case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; |
case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; |
default: |
return; |
} |
break; |
case OP_ADD: |
switch (i->dType) { |
case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; |
case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; |
case TYPE_S32: |
case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; |
default: |
return; |
} |
break; |
case OP_POW: |
switch (i->dType) { |
case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; |
case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; |
default: |
return; |
} |
break; |
case OP_MAX: |
switch (i->dType) { |
case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; |
case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; |
case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; |
case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; |
default: |
return; |
} |
break; |
case OP_MIN: |
switch (i->dType) { |
case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; |
case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; |
case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; |
case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; |
default: |
return; |
} |
break; |
case OP_AND: |
res.data.u64 = a->data.u64 & b->data.u64; |
break; |
case OP_OR: |
res.data.u64 = a->data.u64 | b->data.u64; |
break; |
case OP_XOR: |
res.data.u64 = a->data.u64 ^ b->data.u64; |
break; |
case OP_SHL: |
res.data.u32 = a->data.u32 << b->data.u32; |
break; |
case OP_SHR: |
switch (i->dType) { |
case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; |
case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; |
default: |
return; |
} |
break; |
case OP_SLCT: |
if (a->data.u32 != b->data.u32) |
return; |
res.data.u32 = a->data.u32; |
break; |
case OP_EXTBF: { |
int offset = b->data.u32 & 0xff; |
int width = (b->data.u32 >> 8) & 0xff; |
int rshift = offset; |
int lshift = 0; |
if (width == 0) { |
res.data.u32 = 0; |
break; |
} |
if (width + offset < 32) { |
rshift = 32 - width; |
lshift = 32 - width - offset; |
} |
if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) |
res.data.u32 = util_bitreverse(a->data.u32); |
else |
res.data.u32 = a->data.u32; |
switch (i->dType) { |
case TYPE_S32: res.data.s32 = (res.data.s32 << lshift) >> rshift; break; |
case TYPE_U32: res.data.u32 = (res.data.u32 << lshift) >> rshift; break; |
default: |
return; |
} |
break; |
} |
case OP_POPCNT: |
res.data.u32 = util_bitcount(a->data.u32 & b->data.u32); |
break; |
case OP_PFETCH: |
// The two arguments to pfetch are logically added together. Normally |
// the second argument will not be constant, but that can happen. |
res.data.u32 = a->data.u32 + b->data.u32; |
break; |
default: |
return; |
} |
++foldCount; |
i->src(0).mod = Modifier(0); |
i->src(1).mod = Modifier(0); |
i->postFactor = 0; |
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); |
i->setSrc(1, NULL); |
i->getSrc(0)->reg.data = res.data; |
switch (i->op) { |
case OP_MAD: |
case OP_FMA: { |
i->op = OP_ADD; |
i->setSrc(1, i->getSrc(0)); |
i->src(1).mod = i->src(2).mod; |
i->setSrc(0, i->getSrc(2)); |
i->setSrc(2, NULL); |
ImmediateValue src0; |
if (i->src(0).getImmediate(src0)) |
expr(i, src0, *i->getSrc(1)->asImm()); |
if (i->saturate && !prog->getTarget()->isSatSupported(i)) { |
bld.setPosition(i, false); |
i->setSrc(1, bld.loadImm(NULL, res.data.u32)); |
} |
break; |
} |
case OP_PFETCH: |
// Leave PFETCH alone... we just folded its 2 args into 1. |
break; |
default: |
i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */ |
break; |
} |
i->subOp = 0; |
} |
void |
ConstantFolding::expr(Instruction *i, |
ImmediateValue &imm0, |
ImmediateValue &imm1, |
ImmediateValue &imm2) |
{ |
struct Storage *const a = &imm0.reg, *const b = &imm1.reg, *const c = &imm2.reg; |
struct Storage res; |
memset(&res.data, 0, sizeof(res.data)); |
switch (i->op) { |
case OP_INSBF: { |
int offset = b->data.u32 & 0xff; |
int width = (b->data.u32 >> 8) & 0xff; |
unsigned bitmask = ((1 << width) - 1) << offset; |
res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask); |
break; |
} |
default: |
return; |
} |
++foldCount; |
i->src(0).mod = Modifier(0); |
i->src(1).mod = Modifier(0); |
i->src(2).mod = Modifier(0); |
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); |
i->setSrc(1, NULL); |
i->setSrc(2, NULL); |
i->getSrc(0)->reg.data = res.data; |
i->op = OP_MOV; |
} |
void |
ConstantFolding::unary(Instruction *i, const ImmediateValue &imm) |
{ |
Storage res; |
if (i->dType != TYPE_F32) |
return; |
switch (i->op) { |
case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; |
case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; |
case OP_SAT: res.data.f32 = CLAMP(imm.reg.data.f32, 0.0f, 1.0f); break; |
case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; |
case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; |
case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; |
case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; |
case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; |
case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; |
case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; |
case OP_PRESIN: |
case OP_PREEX2: |
// these should be handled in subsequent OP_SIN/COS/EX2 |
res.data.f32 = imm.reg.data.f32; |
break; |
default: |
return; |
} |
i->op = OP_MOV; |
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); |
i->src(0).mod = Modifier(0); |
} |
void |
ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, |
const int s, ImmediateValue& imm2) |
{ |
const int t = s ? 0 : 1; |
Instruction *insn; |
Instruction *mul1 = NULL; // mul1 before mul2 |
int e = 0; |
float f = imm2.reg.data.f32 * exp2f(mul2->postFactor); |
ImmediateValue imm1; |
assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); |
if (mul2->getSrc(t)->refCount() == 1) { |
insn = mul2->getSrc(t)->getInsn(); |
if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) |
mul1 = insn; |
if (mul1 && !mul1->saturate) { |
int s1; |
if (mul1->src(s1 = 0).getImmediate(imm1) || |
mul1->src(s1 = 1).getImmediate(imm1)) { |
bld.setPosition(mul1, false); |
// a = mul r, imm1 |
// d = mul a, imm2 -> d = mul r, (imm1 * imm2) |
mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); |
mul1->src(s1).mod = Modifier(0); |
mul2->def(0).replace(mul1->getDef(0), false); |
mul1->saturate = mul2->saturate; |
} else |
if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { |
// c = mul a, b |
// d = mul c, imm -> d = mul_x_imm a, b |
mul1->postFactor = e; |
mul2->def(0).replace(mul1->getDef(0), false); |
if (f < 0) |
mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); |
mul1->saturate = mul2->saturate; |
} |
return; |
} |
} |
if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { |
// b = mul a, imm |
// d = mul b, c -> d = mul_x_imm a, c |
int s2, t2; |
insn = (*mul2->getDef(0)->uses.begin())->getInsn(); |
if (!insn) |
return; |
mul1 = mul2; |
mul2 = NULL; |
s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; |
t2 = s2 ? 0 : 1; |
if (insn->op == OP_MUL && insn->dType == TYPE_F32) |
if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) |
mul2 = insn; |
if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { |
mul2->postFactor = e; |
mul2->setSrc(s2, mul1->src(t)); |
if (f < 0) |
mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); |
} |
} |
} |
void |
ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) |
{ |
const int t = !s; |
const operation op = i->op; |
Instruction *newi = i; |
switch (i->op) { |
case OP_MUL: |
if (i->dType == TYPE_F32) |
tryCollapseChainedMULs(i, s, imm0); |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { |
assert(!isFloatType(i->sType)); |
if (imm0.isInteger(1) && i->dType == TYPE_S32) { |
bld.setPosition(i, false); |
// Need to set to the sign value, which is a compare. |
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0), |
TYPE_S32, i->getSrc(t), bld.mkImm(0)); |
delete_Instruction(prog, i); |
} else if (imm0.isInteger(0) || imm0.isInteger(1)) { |
// The high bits can't be set in this case (either mul by 0 or |
// unsigned by 1) |
i->op = OP_MOV; |
i->subOp = 0; |
i->setSrc(0, new_ImmediateValue(prog, 0u)); |
i->src(0).mod = Modifier(0); |
i->setSrc(1, NULL); |
} else if (!imm0.isNegative() && imm0.isPow2()) { |
// Translate into a shift |
imm0.applyLog2(); |
i->op = OP_SHR; |
i->subOp = 0; |
imm0.reg.data.u32 = 32 - imm0.reg.data.u32; |
i->setSrc(0, i->getSrc(t)); |
i->src(0).mod = i->src(t).mod; |
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); |
i->src(1).mod = 0; |
} |
} else |
if (imm0.isInteger(0)) { |
i->op = OP_MOV; |
i->setSrc(0, new_ImmediateValue(prog, 0u)); |
i->src(0).mod = Modifier(0); |
i->postFactor = 0; |
i->setSrc(1, NULL); |
} else |
if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) { |
if (imm0.isNegative()) |
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); |
i->op = i->src(t).mod.getOp(); |
if (s == 0) { |
i->setSrc(0, i->getSrc(1)); |
i->src(0).mod = i->src(1).mod; |
i->src(1).mod = 0; |
} |
if (i->op != OP_CVT) |
i->src(0).mod = 0; |
i->setSrc(1, NULL); |
} else |
if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) { |
if (imm0.isNegative()) |
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); |
i->op = OP_ADD; |
i->setSrc(s, i->getSrc(t)); |
i->src(s).mod = i->src(t).mod; |
} else |
if (!isFloatType(i->sType) && !imm0.isNegative() && imm0.isPow2()) { |
i->op = OP_SHL; |
imm0.applyLog2(); |
i->setSrc(0, i->getSrc(t)); |
i->src(0).mod = i->src(t).mod; |
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); |
i->src(1).mod = 0; |
} |
break; |
case OP_MAD: |
if (imm0.isInteger(0)) { |
i->setSrc(0, i->getSrc(2)); |
i->src(0).mod = i->src(2).mod; |
i->setSrc(1, NULL); |
i->setSrc(2, NULL); |
i->op = i->src(0).mod.getOp(); |
if (i->op != OP_CVT) |
i->src(0).mod = 0; |
} else |
if (imm0.isInteger(1) || imm0.isInteger(-1)) { |
if (imm0.isNegative()) |
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); |
if (s == 0) { |
i->setSrc(0, i->getSrc(1)); |
i->src(0).mod = i->src(1).mod; |
} |
i->setSrc(1, i->getSrc(2)); |
i->src(1).mod = i->src(2).mod; |
i->setSrc(2, NULL); |
i->op = OP_ADD; |
} |
break; |
case OP_ADD: |
if (i->usesFlags()) |
break; |
if (imm0.isInteger(0)) { |
if (s == 0) { |
i->setSrc(0, i->getSrc(1)); |
i->src(0).mod = i->src(1).mod; |
} |
i->setSrc(1, NULL); |
i->op = i->src(0).mod.getOp(); |
if (i->op != OP_CVT) |
i->src(0).mod = Modifier(0); |
} |
break; |
case OP_DIV: |
if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) |
break; |
bld.setPosition(i, false); |
if (imm0.reg.data.u32 == 0) { |
break; |
} else |
if (imm0.reg.data.u32 == 1) { |
i->op = OP_MOV; |
i->setSrc(1, NULL); |
} else |
if (i->dType == TYPE_U32 && imm0.isPow2()) { |
i->op = OP_SHR; |
i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); |
} else |
if (i->dType == TYPE_U32) { |
Instruction *mul; |
Value *tA, *tB; |
const uint32_t d = imm0.reg.data.u32; |
uint32_t m; |
int r, s; |
uint32_t l = util_logbase2(d); |
if (((uint32_t)1 << l) < d) |
++l; |
m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; |
r = l ? 1 : 0; |
s = l ? (l - 1) : 0; |
tA = bld.getSSA(); |
tB = bld.getSSA(); |
mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), |
bld.loadImm(NULL, m)); |
mul->subOp = NV50_IR_SUBOP_MUL_HIGH; |
bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); |
tA = bld.getSSA(); |
if (r) |
bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); |
else |
tA = tB; |
tB = s ? bld.getSSA() : i->getDef(0); |
newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); |
if (s) |
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); |
delete_Instruction(prog, i); |
} else |
if (imm0.reg.data.s32 == -1) { |
i->op = OP_NEG; |
i->setSrc(1, NULL); |
} else { |
LValue *tA, *tB; |
LValue *tD; |
const int32_t d = imm0.reg.data.s32; |
int32_t m; |
int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); |
if ((1 << l) < abs(d)) |
++l; |
if (!l) |
l = 1; |
m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); |
tA = bld.getSSA(); |
tB = bld.getSSA(); |
bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), |
i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; |
if (l > 1) |
bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); |
else |
tB = tA; |
tA = bld.getSSA(); |
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0)); |
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); |
newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); |
if (d < 0) |
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); |
delete_Instruction(prog, i); |
} |
break; |
case OP_MOD: |
if (i->sType == TYPE_U32 && imm0.isPow2()) { |
bld.setPosition(i, false); |
i->op = OP_AND; |
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); |
} |
break; |
case OP_SET: // TODO: SET_AND,OR,XOR |
{ |
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); |
CondCode cc, ccZ; |
if (i->src(t).mod != Modifier(0)) |
return; |
if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET) |
return; |
cc = si->setCond; |
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); |
if (s == 0) |
ccZ = reverseCondCode(ccZ); |
switch (ccZ) { |
case CC_LT: cc = CC_FL; break; |
case CC_GE: cc = CC_TR; break; |
case CC_EQ: cc = inverseCondCode(cc); break; |
case CC_LE: cc = inverseCondCode(cc); break; |
case CC_GT: break; |
case CC_NE: break; |
default: |
return; |
} |
i->asCmp()->setCond = cc; |
i->setSrc(0, si->src(0)); |
i->setSrc(1, si->src(1)); |
i->sType = si->sType; |
} |
break; |
case OP_SHL: |
{ |
if (s != 1 || i->src(0).mod != Modifier(0)) |
break; |
// try to concatenate shifts |
Instruction *si = i->getSrc(0)->getInsn(); |
if (!si || si->op != OP_SHL) |
break; |
ImmediateValue imm1; |
if (si->src(1).getImmediate(imm1)) { |
bld.setPosition(i, false); |
i->setSrc(0, si->getSrc(0)); |
i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); |
} |
} |
break; |
case OP_ABS: |
case OP_NEG: |
case OP_SAT: |
case OP_LG2: |
case OP_RCP: |
case OP_SQRT: |
case OP_RSQ: |
case OP_PRESIN: |
case OP_SIN: |
case OP_COS: |
case OP_PREEX2: |
case OP_EX2: |
unary(i, imm0); |
break; |
case OP_BFIND: { |
int32_t res; |
switch (i->dType) { |
case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break; |
case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break; |
default: |
return; |
} |
if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0) |
res = 31 - res; |
bld.setPosition(i, false); /* make sure bld is init'ed */ |
i->setSrc(0, bld.mkImm(res)); |
i->setSrc(1, NULL); |
i->op = OP_MOV; |
i->subOp = 0; |
break; |
} |
case OP_POPCNT: { |
// Only deal with 1-arg POPCNT here |
if (i->srcExists(1)) |
break; |
uint32_t res = util_bitcount(imm0.reg.data.u32); |
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); |
i->setSrc(1, NULL); |
i->op = OP_MOV; |
break; |
} |
default: |
return; |
} |
if (newi->op != op) |
foldCount++; |
} |
// ============================================================================= |
// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. |
class ModifierFolding : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
}; |
bool |
ModifierFolding::visit(BasicBlock *bb) |
{ |
const Target *target = prog->getTarget(); |
Instruction *i, *next, *mi; |
Modifier mod; |
for (i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (0 && i->op == OP_SUB) { |
// turn "sub" into "add neg" (do we really want this ?) |
i->op = OP_ADD; |
i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); |
} |
for (int s = 0; s < 3 && i->srcExists(s); ++s) { |
mi = i->getSrc(s)->getInsn(); |
if (!mi || |
mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) |
continue; |
if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { |
if ((i->op != OP_ADD && |
i->op != OP_MUL) || |
(mi->op != OP_ABS && |
mi->op != OP_NEG)) |
continue; |
} else |
if (i->sType != mi->dType) { |
continue; |
} |
if ((mod = Modifier(mi->op)) == Modifier(0)) |
continue; |
mod *= mi->src(0).mod; |
if ((i->op == OP_ABS) || i->src(s).mod.abs()) { |
// abs neg [abs] = abs |
mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); |
} else |
if ((i->op == OP_NEG) && mod.neg()) { |
assert(s == 0); |
// neg as both opcode and modifier on same insn is prohibited |
// neg neg abs = abs, neg neg = identity |
mod = mod & Modifier(~NV50_IR_MOD_NEG); |
i->op = mod.getOp(); |
mod = mod & Modifier(~NV50_IR_MOD_ABS); |
if (mod == Modifier(0)) |
i->op = OP_MOV; |
} |
if (target->isModSupported(i, s, mod)) { |
i->setSrc(s, mi->getSrc(0)); |
i->src(s).mod *= mod; |
} |
} |
if (i->op == OP_SAT) { |
mi = i->getSrc(0)->getInsn(); |
if (mi && |
mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { |
mi->saturate = 1; |
mi->setDef(0, i->getDef(0)); |
delete_Instruction(prog, i); |
} |
} |
} |
return true; |
} |
// ============================================================================= |
// MUL + ADD -> MAD/FMA |
// MIN/MAX(a, a) -> a, etc. |
// SLCT(a, b, const) -> cc(const) ? a : b |
// RCP(RCP(a)) -> a |
// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) |
class AlgebraicOpt : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
void handleABS(Instruction *); |
bool handleADD(Instruction *); |
bool tryADDToMADOrSAD(Instruction *, operation toOp); |
void handleMINMAX(Instruction *); |
void handleRCP(Instruction *); |
void handleSLCT(Instruction *); |
void handleLOGOP(Instruction *); |
void handleCVT(Instruction *); |
void handleSUCLAMP(Instruction *); |
BuildUtil bld; |
}; |
void |
AlgebraicOpt::handleABS(Instruction *abs) |
{ |
Instruction *sub = abs->getSrc(0)->getInsn(); |
DataType ty; |
if (!sub || |
!prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) |
return; |
// expect not to have mods yet, if we do, bail |
if (sub->src(0).mod || sub->src(1).mod) |
return; |
// hidden conversion ? |
ty = intTypeToSigned(sub->dType); |
if (abs->dType != abs->sType || ty != abs->sType) |
return; |
if ((sub->op != OP_ADD && sub->op != OP_SUB) || |
sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || |
sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) |
return; |
Value *src0 = sub->getSrc(0); |
Value *src1 = sub->getSrc(1); |
if (sub->op == OP_ADD) { |
Instruction *neg = sub->getSrc(1)->getInsn(); |
if (neg && neg->op != OP_NEG) { |
neg = sub->getSrc(0)->getInsn(); |
src0 = sub->getSrc(1); |
} |
if (!neg || neg->op != OP_NEG || |
neg->dType != neg->sType || neg->sType != ty) |
return; |
src1 = neg->getSrc(0); |
} |
// found ABS(SUB)) |
abs->moveSources(1, 2); // move sources >=1 up by 2 |
abs->op = OP_SAD; |
abs->setType(sub->dType); |
abs->setSrc(0, src0); |
abs->setSrc(1, src1); |
bld.setPosition(abs, false); |
abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); |
} |
bool |
AlgebraicOpt::handleADD(Instruction *add) |
{ |
Value *src0 = add->getSrc(0); |
Value *src1 = add->getSrc(1); |
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) |
return false; |
bool changed = false; |
if (!changed && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) |
changed = tryADDToMADOrSAD(add, OP_MAD); |
if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) |
changed = tryADDToMADOrSAD(add, OP_SAD); |
return changed; |
} |
// ADD(SAD(a,b,0), c) -> SAD(a,b,c) |
// ADD(MUL(a,b), c) -> MAD(a,b,c) |
bool |
AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) |
{ |
Value *src0 = add->getSrc(0); |
Value *src1 = add->getSrc(1); |
Value *src; |
int s; |
const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; |
const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); |
Modifier mod[4]; |
if (src0->refCount() == 1 && |
src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) |
s = 0; |
else |
if (src1->refCount() == 1 && |
src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) |
s = 1; |
else |
return false; |
if ((src0->getUniqueInsn() && src0->getUniqueInsn()->bb != add->bb) || |
(src1->getUniqueInsn() && src1->getUniqueInsn()->bb != add->bb)) |
return false; |
src = add->getSrc(s); |
if (src->getInsn()->postFactor) |
return false; |
if (toOp == OP_SAD) { |
ImmediateValue imm; |
if (!src->getInsn()->src(2).getImmediate(imm)) |
return false; |
if (!imm.isInteger(0)) |
return false; |
} |
mod[0] = add->src(0).mod; |
mod[1] = add->src(1).mod; |
mod[2] = src->getUniqueInsn()->src(0).mod; |
mod[3] = src->getUniqueInsn()->src(1).mod; |
if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) |
return false; |
add->op = toOp; |
add->subOp = src->getInsn()->subOp; // potentially mul-high |
add->setSrc(2, add->src(s ? 0 : 1)); |
add->setSrc(0, src->getInsn()->getSrc(0)); |
add->src(0).mod = mod[2] ^ mod[s]; |
add->setSrc(1, src->getInsn()->getSrc(1)); |
add->src(1).mod = mod[3]; |
return true; |
} |
void |
AlgebraicOpt::handleMINMAX(Instruction *minmax) |
{ |
Value *src0 = minmax->getSrc(0); |
Value *src1 = minmax->getSrc(1); |
if (src0 != src1 || src0->reg.file != FILE_GPR) |
return; |
if (minmax->src(0).mod == minmax->src(1).mod) { |
if (minmax->def(0).mayReplace(minmax->src(0))) { |
minmax->def(0).replace(minmax->src(0), false); |
minmax->bb->remove(minmax); |
} else { |
minmax->op = OP_CVT; |
minmax->setSrc(1, NULL); |
} |
} else { |
// TODO: |
// min(x, -x) = -abs(x) |
// min(x, -abs(x)) = -abs(x) |
// min(x, abs(x)) = x |
// max(x, -abs(x)) = x |
// max(x, abs(x)) = abs(x) |
// max(x, -x) = abs(x) |
} |
} |
void |
AlgebraicOpt::handleRCP(Instruction *rcp) |
{ |
Instruction *si = rcp->getSrc(0)->getUniqueInsn(); |
if (si && si->op == OP_RCP) { |
Modifier mod = rcp->src(0).mod * si->src(0).mod; |
rcp->op = mod.getOp(); |
rcp->setSrc(0, si->getSrc(0)); |
} |
} |
void |
AlgebraicOpt::handleSLCT(Instruction *slct) |
{ |
if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { |
if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) |
slct->setSrc(0, slct->getSrc(1)); |
} else |
if (slct->getSrc(0) != slct->getSrc(1)) { |
return; |
} |
slct->op = OP_MOV; |
slct->setSrc(1, NULL); |
slct->setSrc(2, NULL); |
} |
void |
AlgebraicOpt::handleLOGOP(Instruction *logop) |
{ |
Value *src0 = logop->getSrc(0); |
Value *src1 = logop->getSrc(1); |
if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) |
return; |
if (src0 == src1) { |
if ((logop->op == OP_AND || logop->op == OP_OR) && |
logop->def(0).mayReplace(logop->src(0))) { |
logop->def(0).replace(logop->src(0), false); |
delete_Instruction(prog, logop); |
} |
} else { |
// try AND(SET, SET) -> SET_AND(SET) |
Instruction *set0 = src0->getInsn(); |
Instruction *set1 = src1->getInsn(); |
if (!set0 || set0->fixed || !set1 || set1->fixed) |
return; |
if (set1->op != OP_SET) { |
Instruction *xchg = set0; |
set0 = set1; |
set1 = xchg; |
if (set1->op != OP_SET) |
return; |
} |
operation redOp = (logop->op == OP_AND ? OP_SET_AND : |
logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR); |
if (!prog->getTarget()->isOpSupported(redOp, set1->sType)) |
return; |
if (set0->op != OP_SET && |
set0->op != OP_SET_AND && |
set0->op != OP_SET_OR && |
set0->op != OP_SET_XOR) |
return; |
if (set0->getDef(0)->refCount() > 1 && |
set1->getDef(0)->refCount() > 1) |
return; |
if (set0->getPredicate() || set1->getPredicate()) |
return; |
// check that they don't source each other |
for (int s = 0; s < 2; ++s) |
if (set0->getSrc(s) == set1->getDef(0) || |
set1->getSrc(s) == set0->getDef(0)) |
return; |
set0 = cloneForward(func, set0); |
set1 = cloneShallow(func, set1); |
logop->bb->insertAfter(logop, set1); |
logop->bb->insertAfter(logop, set0); |
set0->dType = TYPE_U8; |
set0->getDef(0)->reg.file = FILE_PREDICATE; |
set0->getDef(0)->reg.size = 1; |
set1->setSrc(2, set0->getDef(0)); |
set1->op = redOp; |
set1->setDef(0, logop->getDef(0)); |
delete_Instruction(prog, logop); |
} |
} |
// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 |
// nv50: |
// F2I(NEG(I2F(ABS(SET)))) |
void |
AlgebraicOpt::handleCVT(Instruction *cvt) |
{ |
if (cvt->sType != TYPE_F32 || |
cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) |
return; |
Instruction *insn = cvt->getSrc(0)->getInsn(); |
if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) |
return; |
if (insn->src(0).mod != Modifier(0)) |
return; |
insn = insn->getSrc(0)->getInsn(); |
// check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET |
if (insn && insn->op == OP_CVT && |
insn->dType == TYPE_F32 && |
insn->sType == TYPE_S32) { |
insn = insn->getSrc(0)->getInsn(); |
if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 || |
insn->src(0).mod) |
return; |
insn = insn->getSrc(0)->getInsn(); |
if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32) |
return; |
} else |
if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) { |
return; |
} |
Instruction *bset = cloneShallow(func, insn); |
bset->dType = TYPE_U32; |
bset->setDef(0, cvt->getDef(0)); |
cvt->bb->insertAfter(cvt, bset); |
delete_Instruction(prog, cvt); |
} |
// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6) |
void |
AlgebraicOpt::handleSUCLAMP(Instruction *insn) |
{ |
ImmediateValue imm; |
int32_t val = insn->getSrc(2)->asImm()->reg.data.s32; |
int s; |
Instruction *add; |
assert(insn->srcExists(0) && insn->src(0).getFile() == FILE_GPR); |
// look for ADD (TODO: only count references by non-SUCLAMP) |
if (insn->getSrc(0)->refCount() > 1) |
return; |
add = insn->getSrc(0)->getInsn(); |
if (!add || add->op != OP_ADD || |
(add->dType != TYPE_U32 && |
add->dType != TYPE_S32)) |
return; |
// look for immediate |
for (s = 0; s < 2; ++s) |
if (add->src(s).getImmediate(imm)) |
break; |
if (s >= 2) |
return; |
s = s ? 0 : 1; |
// determine if immediate fits |
val += imm.reg.data.s32; |
if (val > 31 || val < -32) |
return; |
// determine if other addend fits |
if (add->src(s).getFile() != FILE_GPR || add->src(s).mod != Modifier(0)) |
return; |
bld.setPosition(insn, false); // make sure bld is init'ed |
// replace sources |
insn->setSrc(2, bld.mkImm(val)); |
insn->setSrc(0, add->getSrc(s)); |
} |
bool |
AlgebraicOpt::visit(BasicBlock *bb) |
{ |
Instruction *next; |
for (Instruction *i = bb->getEntry(); i; i = next) { |
next = i->next; |
switch (i->op) { |
case OP_ABS: |
handleABS(i); |
break; |
case OP_ADD: |
handleADD(i); |
break; |
case OP_RCP: |
handleRCP(i); |
break; |
case OP_MIN: |
case OP_MAX: |
handleMINMAX(i); |
break; |
case OP_SLCT: |
handleSLCT(i); |
break; |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
handleLOGOP(i); |
break; |
case OP_CVT: |
handleCVT(i); |
break; |
case OP_SUCLAMP: |
handleSUCLAMP(i); |
break; |
default: |
break; |
} |
} |
return true; |
} |
// ============================================================================= |
static inline void |
updateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) |
{ |
if (offset != ldst->getSrc(0)->reg.data.offset) { |
if (ldst->getSrc(0)->refCount() > 1) |
ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); |
ldst->getSrc(0)->reg.data.offset = offset; |
} |
} |
// Combine loads and stores, forward stores to loads where possible. |
class MemoryOpt : public Pass |
{ |
private: |
class Record |
{ |
public: |
Record *next; |
Instruction *insn; |
const Value *rel[2]; |
const Value *base; |
int32_t offset; |
int8_t fileIndex; |
uint8_t size; |
bool locked; |
Record *prev; |
bool overlaps(const Instruction *ldst) const; |
inline void link(Record **); |
inline void unlink(Record **); |
inline void set(const Instruction *ldst); |
}; |
public: |
MemoryOpt(); |
Record *loads[DATA_FILE_COUNT]; |
Record *stores[DATA_FILE_COUNT]; |
MemoryPool recordPool; |
private: |
virtual bool visit(BasicBlock *); |
bool runOpt(BasicBlock *); |
Record **getList(const Instruction *); |
Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; |
// merge @insn into load/store instruction from @rec |
bool combineLd(Record *rec, Instruction *ld); |
bool combineSt(Record *rec, Instruction *st); |
bool replaceLdFromLd(Instruction *ld, Record *ldRec); |
bool replaceLdFromSt(Instruction *ld, Record *stRec); |
bool replaceStFromSt(Instruction *restrict st, Record *stRec); |
void addRecord(Instruction *ldst); |
void purgeRecords(Instruction *const st, DataFile); |
void lockStores(Instruction *const ld); |
void reset(); |
private: |
Record *prevRecord; |
}; |
MemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) |
{ |
for (int i = 0; i < DATA_FILE_COUNT; ++i) { |
loads[i] = NULL; |
stores[i] = NULL; |
} |
prevRecord = NULL; |
} |
void |
MemoryOpt::reset() |
{ |
for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { |
Record *it, *next; |
for (it = loads[i]; it; it = next) { |
next = it->next; |
recordPool.release(it); |
} |
loads[i] = NULL; |
for (it = stores[i]; it; it = next) { |
next = it->next; |
recordPool.release(it); |
} |
stores[i] = NULL; |
} |
} |
bool |
MemoryOpt::combineLd(Record *rec, Instruction *ld) |
{ |
int32_t offRc = rec->offset; |
int32_t offLd = ld->getSrc(0)->reg.data.offset; |
int sizeRc = rec->size; |
int sizeLd = typeSizeof(ld->dType); |
int size = sizeRc + sizeLd; |
int d, j; |
if (!prog->getTarget()-> |
isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) |
return false; |
// no unaligned loads |
if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || |
((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) |
return false; |
assert(sizeRc + sizeLd <= 16 && offRc != offLd); |
for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); |
if (offLd < offRc) { |
int sz; |
for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); |
// d: nr of definitions in ld |
// j: nr of definitions in rec->insn, move: |
for (d = d + j - 1; j > 0; --j, --d) |
rec->insn->setDef(d, rec->insn->getDef(j - 1)); |
if (rec->insn->getSrc(0)->refCount() > 1) |
rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); |
rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; |
d = 0; |
} else { |
d = j; |
} |
// move definitions of @ld to @rec->insn |
for (j = 0; sizeLd; ++j, ++d) { |
sizeLd -= ld->getDef(j)->reg.size; |
rec->insn->setDef(d, ld->getDef(j)); |
} |
rec->size = size; |
rec->insn->getSrc(0)->reg.size = size; |
rec->insn->setType(typeOfSize(size)); |
delete_Instruction(prog, ld); |
return true; |
} |
bool |
MemoryOpt::combineSt(Record *rec, Instruction *st) |
{ |
int32_t offRc = rec->offset; |
int32_t offSt = st->getSrc(0)->reg.data.offset; |
int sizeRc = rec->size; |
int sizeSt = typeSizeof(st->dType); |
int s = sizeSt / 4; |
int size = sizeRc + sizeSt; |
int j, k; |
Value *src[4]; // no modifiers in ValueRef allowed for st |
Value *extra[3]; |
if (!prog->getTarget()-> |
isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) |
return false; |
if (size == 8 && MIN2(offRc, offSt) & 0x7) |
return false; |
st->takeExtraSources(0, extra); // save predicate and indirect address |
if (offRc < offSt) { |
// save values from @st |
for (s = 0; sizeSt; ++s) { |
sizeSt -= st->getSrc(s + 1)->reg.size; |
src[s] = st->getSrc(s + 1); |
} |
// set record's values as low sources of @st |
for (j = 1; sizeRc; ++j) { |
sizeRc -= rec->insn->getSrc(j)->reg.size; |
st->setSrc(j, rec->insn->getSrc(j)); |
} |
// set saved values as high sources of @st |
for (k = j, j = 0; j < s; ++j) |
st->setSrc(k++, src[j]); |
updateLdStOffset(st, offRc, func); |
} else { |
for (j = 1; sizeSt; ++j) |
sizeSt -= st->getSrc(j)->reg.size; |
for (s = 1; sizeRc; ++j, ++s) { |
sizeRc -= rec->insn->getSrc(s)->reg.size; |
st->setSrc(j, rec->insn->getSrc(s)); |
} |
rec->offset = offSt; |
} |
st->putExtraSources(0, extra); // restore pointer and predicate |
delete_Instruction(prog, rec->insn); |
rec->insn = st; |
rec->size = size; |
rec->insn->getSrc(0)->reg.size = size; |
rec->insn->setType(typeOfSize(size)); |
return true; |
} |
void |
MemoryOpt::Record::set(const Instruction *ldst) |
{ |
const Symbol *mem = ldst->getSrc(0)->asSym(); |
fileIndex = mem->reg.fileIndex; |
rel[0] = ldst->getIndirect(0, 0); |
rel[1] = ldst->getIndirect(0, 1); |
offset = mem->reg.data.offset; |
base = mem->getBase(); |
size = typeSizeof(ldst->sType); |
} |
void |
MemoryOpt::Record::link(Record **list) |
{ |
next = *list; |
if (next) |
next->prev = this; |
prev = NULL; |
*list = this; |
} |
void |
MemoryOpt::Record::unlink(Record **list) |
{ |
if (next) |
next->prev = prev; |
if (prev) |
prev->next = next; |
else |
*list = next; |
} |
MemoryOpt::Record ** |
MemoryOpt::getList(const Instruction *insn) |
{ |
if (insn->op == OP_LOAD || insn->op == OP_VFETCH) |
return &loads[insn->src(0).getFile()]; |
return &stores[insn->src(0).getFile()]; |
} |
void |
MemoryOpt::addRecord(Instruction *i) |
{ |
Record **list = getList(i); |
Record *it = reinterpret_cast<Record *>(recordPool.allocate()); |
it->link(list); |
it->set(i); |
it->insn = i; |
it->locked = false; |
} |
MemoryOpt::Record * |
MemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const |
{ |
const Symbol *sym = insn->getSrc(0)->asSym(); |
const int size = typeSizeof(insn->sType); |
Record *rec = NULL; |
Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; |
for (; it; it = it->next) { |
if (it->locked && insn->op != OP_LOAD) |
continue; |
if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || |
it->rel[0] != insn->getIndirect(0, 0) || |
it->fileIndex != sym->reg.fileIndex || |
it->rel[1] != insn->getIndirect(0, 1)) |
continue; |
if (it->offset < sym->reg.data.offset) { |
if (it->offset + it->size >= sym->reg.data.offset) { |
isAdj = (it->offset + it->size == sym->reg.data.offset); |
if (!isAdj) |
return it; |
if (!(it->offset & 0x7)) |
rec = it; |
} |
} else { |
isAdj = it->offset != sym->reg.data.offset; |
if (size <= it->size && !isAdj) |
return it; |
else |
if (!(sym->reg.data.offset & 0x7)) |
if (it->offset - size <= sym->reg.data.offset) |
rec = it; |
} |
} |
return rec; |
} |
bool |
MemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) |
{ |
Instruction *st = rec->insn; |
int32_t offSt = rec->offset; |
int32_t offLd = ld->getSrc(0)->reg.data.offset; |
int d, s; |
for (s = 1; offSt != offLd && st->srcExists(s); ++s) |
offSt += st->getSrc(s)->reg.size; |
if (offSt != offLd) |
return false; |
for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { |
if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) |
return false; |
if (st->getSrc(s)->reg.file != FILE_GPR) |
return false; |
ld->def(d).replace(st->src(s), false); |
} |
ld->bb->remove(ld); |
return true; |
} |
bool |
MemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) |
{ |
Instruction *ldR = rec->insn; |
int32_t offR = rec->offset; |
int32_t offE = ldE->getSrc(0)->reg.data.offset; |
int dR, dE; |
assert(offR <= offE); |
for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) |
offR += ldR->getDef(dR)->reg.size; |
if (offR != offE) |
return false; |
for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { |
if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) |
return false; |
ldE->def(dE).replace(ldR->getDef(dR), false); |
} |
delete_Instruction(prog, ldE); |
return true; |
} |
bool |
MemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) |
{ |
const Instruction *const ri = rec->insn; |
Value *extra[3]; |
int32_t offS = st->getSrc(0)->reg.data.offset; |
int32_t offR = rec->offset; |
int32_t endS = offS + typeSizeof(st->dType); |
int32_t endR = offR + typeSizeof(ri->dType); |
rec->size = MAX2(endS, endR) - MIN2(offS, offR); |
st->takeExtraSources(0, extra); |
if (offR < offS) { |
Value *vals[10]; |
int s, n; |
int k = 0; |
// get non-replaced sources of ri |
for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) |
vals[k++] = ri->getSrc(s); |
n = s; |
// get replaced sources of st |
for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) |
vals[k++] = st->getSrc(s); |
// skip replaced sources of ri |
for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); |
// get non-replaced sources after values covered by st |
for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) |
vals[k++] = ri->getSrc(s); |
assert((unsigned int)k <= Elements(vals)); |
for (s = 0; s < k; ++s) |
st->setSrc(s + 1, vals[s]); |
st->setSrc(0, ri->getSrc(0)); |
} else |
if (endR > endS) { |
int j, s; |
for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); |
for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); |
for (; offR < endR; offR += ri->getSrc(j++)->reg.size) |
st->setSrc(s++, ri->getSrc(j)); |
} |
st->putExtraSources(0, extra); |
delete_Instruction(prog, rec->insn); |
rec->insn = st; |
rec->offset = st->getSrc(0)->reg.data.offset; |
st->setType(typeOfSize(rec->size)); |
return true; |
} |
bool |
MemoryOpt::Record::overlaps(const Instruction *ldst) const |
{ |
Record that; |
that.set(ldst); |
if (this->fileIndex != that.fileIndex) |
return false; |
if (this->rel[0] || that.rel[0]) |
return this->base == that.base; |
return |
(this->offset < that.offset + that.size) && |
(this->offset + this->size > that.offset); |
} |
// We must not eliminate stores that affect the result of @ld if |
// we find later stores to the same location, and we may no longer |
// merge them with later stores. |
// The stored value can, however, still be used to determine the value |
// returned by future loads. |
void |
MemoryOpt::lockStores(Instruction *const ld) |
{ |
for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) |
if (!r->locked && r->overlaps(ld)) |
r->locked = true; |
} |
// Prior loads from the location of @st are no longer valid. |
// Stores to the location of @st may no longer be used to derive |
// the value at it nor be coalesced into later stores. |
void |
MemoryOpt::purgeRecords(Instruction *const st, DataFile f) |
{ |
if (st) |
f = st->src(0).getFile(); |
for (Record *r = loads[f]; r; r = r->next) |
if (!st || r->overlaps(st)) |
r->unlink(&loads[f]); |
for (Record *r = stores[f]; r; r = r->next) |
if (!st || r->overlaps(st)) |
r->unlink(&stores[f]); |
} |
bool |
MemoryOpt::visit(BasicBlock *bb) |
{ |
bool ret = runOpt(bb); |
// Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st |
// where 96 bit memory operations are forbidden. |
if (ret) |
ret = runOpt(bb); |
return ret; |
} |
bool |
MemoryOpt::runOpt(BasicBlock *bb) |
{ |
Instruction *ldst, *next; |
Record *rec; |
bool isAdjacent = true; |
for (ldst = bb->getEntry(); ldst; ldst = next) { |
bool keep = true; |
bool isLoad = true; |
next = ldst->next; |
if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { |
if (ldst->isDead()) { |
// might have been produced by earlier optimization |
delete_Instruction(prog, ldst); |
continue; |
} |
} else |
if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { |
isLoad = false; |
} else { |
// TODO: maybe have all fixed ops act as barrier ? |
if (ldst->op == OP_CALL || |
ldst->op == OP_BAR || |
ldst->op == OP_MEMBAR) { |
purgeRecords(NULL, FILE_MEMORY_LOCAL); |
purgeRecords(NULL, FILE_MEMORY_GLOBAL); |
purgeRecords(NULL, FILE_MEMORY_SHARED); |
purgeRecords(NULL, FILE_SHADER_OUTPUT); |
} else |
if (ldst->op == OP_ATOM || ldst->op == OP_CCTL) { |
if (ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) { |
purgeRecords(NULL, FILE_MEMORY_LOCAL); |
purgeRecords(NULL, FILE_MEMORY_GLOBAL); |
purgeRecords(NULL, FILE_MEMORY_SHARED); |
} else { |
purgeRecords(NULL, ldst->src(0).getFile()); |
} |
} else |
if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { |
purgeRecords(NULL, FILE_SHADER_OUTPUT); |
} |
continue; |
} |
if (ldst->getPredicate()) // TODO: handle predicated ld/st |
continue; |
if (isLoad) { |
DataFile file = ldst->src(0).getFile(); |
// if ld l[]/g[] look for previous store to eliminate the reload |
if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { |
// TODO: shared memory ? |
rec = findRecord(ldst, false, isAdjacent); |
if (rec && !isAdjacent) |
keep = !replaceLdFromSt(ldst, rec); |
} |
// or look for ld from the same location and replace this one |
rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; |
if (rec) { |
if (!isAdjacent) |
keep = !replaceLdFromLd(ldst, rec); |
else |
// or combine a previous load with this one |
keep = !combineLd(rec, ldst); |
} |
if (keep) |
lockStores(ldst); |
} else { |
rec = findRecord(ldst, false, isAdjacent); |
if (rec) { |
if (!isAdjacent) |
keep = !replaceStFromSt(ldst, rec); |
else |
keep = !combineSt(rec, ldst); |
} |
if (keep) |
purgeRecords(ldst, DATA_FILE_COUNT); |
} |
if (keep) |
addRecord(ldst); |
} |
reset(); |
return true; |
} |
// ============================================================================= |
// Turn control flow into predicated instructions (after register allocation !). |
// TODO: |
// Could move this to before register allocation on NVC0 and also handle nested |
// constructs. |
class FlatteningPass : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
bool tryPredicateConditional(BasicBlock *); |
void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); |
void tryPropagateBranch(BasicBlock *); |
inline bool isConstantCondition(Value *pred); |
inline bool mayPredicate(const Instruction *, const Value *pred) const; |
inline void removeFlow(Instruction *); |
}; |
bool |
FlatteningPass::isConstantCondition(Value *pred) |
{ |
Instruction *insn = pred->getUniqueInsn(); |
assert(insn); |
if (insn->op != OP_SET || insn->srcExists(2)) |
return false; |
for (int s = 0; s < 2 && insn->srcExists(s); ++s) { |
Instruction *ld = insn->getSrc(s)->getUniqueInsn(); |
DataFile file; |
if (ld) { |
if (ld->op != OP_MOV && ld->op != OP_LOAD) |
return false; |
if (ld->src(0).isIndirect(0)) |
return false; |
file = ld->src(0).getFile(); |
} else { |
file = insn->src(s).getFile(); |
// catch $r63 on NVC0 |
if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR) |
file = FILE_IMMEDIATE; |
} |
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) |
return false; |
} |
return true; |
} |
void |
FlatteningPass::removeFlow(Instruction *insn) |
{ |
FlowInstruction *term = insn ? insn->asFlow() : NULL; |
if (!term) |
return; |
Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); |
if (term->op == OP_BRA) { |
// TODO: this might get more difficult when we get arbitrary BRAs |
if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) |
return; |
} else |
if (term->op != OP_JOIN) |
return; |
Value *pred = term->getPredicate(); |
delete_Instruction(prog, term); |
if (pred && pred->refCount() == 0) { |
Instruction *pSet = pred->getUniqueInsn(); |
pred->join->reg.data.id = -1; // deallocate |
if (pSet->isDead()) |
delete_Instruction(prog, pSet); |
} |
} |
void |
FlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) |
{ |
for (Instruction *i = bb->getEntry(); i; i = i->next) { |
if (i->isNop()) |
continue; |
assert(!i->getPredicate()); |
i->setPredicate(cc, pred); |
} |
removeFlow(bb->getExit()); |
} |
bool |
FlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const |
{ |
if (insn->isPseudo()) |
return true; |
// TODO: calls where we don't know which registers are modified |
if (!prog->getTarget()->mayPredicate(insn, pred)) |
return false; |
for (int d = 0; insn->defExists(d); ++d) |
if (insn->getDef(d)->equals(pred)) |
return false; |
return true; |
} |
// If we jump to BRA/RET/EXIT, replace the jump with it. |
// NOTE: We do not update the CFG anymore here ! |
// |
// TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?): |
// BB:0 |
// @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1 |
// BB1: |
// bra BB:3 |
// BB2: |
// ... |
// BB3: |
// ... |
void |
FlatteningPass::tryPropagateBranch(BasicBlock *bb) |
{ |
for (Instruction *i = bb->getExit(); i && i->op == OP_BRA; i = i->prev) { |
BasicBlock *bf = i->asFlow()->target.bb; |
if (bf->getInsnCount() != 1) |
continue; |
FlowInstruction *bra = i->asFlow(); |
FlowInstruction *rep = bf->getExit()->asFlow(); |
if (!rep || rep->getPredicate()) |
continue; |
if (rep->op != OP_BRA && |
rep->op != OP_JOIN && |
rep->op != OP_EXIT) |
continue; |
// TODO: If there are multiple branches to @rep, only the first would |
// be replaced, so only remove them after this pass is done ? |
// Also, need to check all incident blocks for fall-through exits and |
// add the branch there. |
bra->op = rep->op; |
bra->target.bb = rep->target.bb; |
if (bf->cfg.incidentCount() == 1) |
bf->remove(rep); |
} |
} |
bool |
FlatteningPass::visit(BasicBlock *bb) |
{ |
if (tryPredicateConditional(bb)) |
return true; |
// try to attach join to previous instruction |
if (prog->getTarget()->hasJoin) { |
Instruction *insn = bb->getExit(); |
if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { |
insn = insn->prev; |
if (insn && !insn->getPredicate() && |
!insn->asFlow() && |
insn->op != OP_TEXBAR && |
!isTextureOp(insn->op) && // probably just nve4 |
!isSurfaceOp(insn->op) && // not confirmed |
insn->op != OP_LINTERP && // probably just nve4 |
insn->op != OP_PINTERP && // probably just nve4 |
((insn->op != OP_LOAD && insn->op != OP_STORE) || |
typeSizeof(insn->dType) <= 4) && |
!insn->isNop()) { |
insn->join = 1; |
bb->remove(bb->getExit()); |
return true; |
} |
} |
} |
tryPropagateBranch(bb); |
return true; |
} |
bool |
FlatteningPass::tryPredicateConditional(BasicBlock *bb) |
{ |
BasicBlock *bL = NULL, *bR = NULL; |
unsigned int nL = 0, nR = 0, limit = 12; |
Instruction *insn; |
unsigned int mask; |
mask = bb->initiatesSimpleConditional(); |
if (!mask) |
return false; |
assert(bb->getExit()); |
Value *pred = bb->getExit()->getPredicate(); |
assert(pred); |
if (isConstantCondition(pred)) |
limit = 4; |
Graph::EdgeIterator ei = bb->cfg.outgoing(); |
if (mask & 1) { |
bL = BasicBlock::get(ei.getNode()); |
for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) |
if (!mayPredicate(insn, pred)) |
return false; |
if (nL > limit) |
return false; // too long, do a real branch |
} |
ei.next(); |
if (mask & 2) { |
bR = BasicBlock::get(ei.getNode()); |
for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) |
if (!mayPredicate(insn, pred)) |
return false; |
if (nR > limit) |
return false; // too long, do a real branch |
} |
if (bL) |
predicateInstructions(bL, pred, bb->getExit()->cc); |
if (bR) |
predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); |
if (bb->joinAt) { |
bb->remove(bb->joinAt); |
bb->joinAt = NULL; |
} |
removeFlow(bb->getExit()); // delete the branch/join at the fork point |
// remove potential join operations at the end of the conditional |
if (prog->getTarget()->joinAnterior) { |
bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); |
if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) |
removeFlow(bb->getEntry()); |
} |
return true; |
} |
// ============================================================================= |
// Fold Immediate into MAD; must be done after register allocation due to |
// constraint SDST == SSRC2 |
// TODO: |
// Does NVC0+ have other situations where this pass makes sense? |
class NV50PostRaConstantFolding : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
}; |
bool |
NV50PostRaConstantFolding::visit(BasicBlock *bb) |
{ |
Value *vtmp; |
Instruction *def; |
for (Instruction *i = bb->getFirst(); i; i = i->next) { |
switch (i->op) { |
case OP_MAD: |
if (i->def(0).getFile() != FILE_GPR || |
i->src(0).getFile() != FILE_GPR || |
i->src(1).getFile() != FILE_GPR || |
i->src(2).getFile() != FILE_GPR || |
i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id || |
!isFloatType(i->dType)) |
break; |
def = i->getSrc(1)->getInsn(); |
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { |
vtmp = i->getSrc(1); |
i->setSrc(1, def->getSrc(0)); |
/* There's no post-RA dead code elimination, so do it here |
* XXX: if we add more code-removing post-RA passes, we might |
* want to create a post-RA dead-code elim pass */ |
if (vtmp->refCount() == 0) |
delete_Instruction(bb->getProgram(), def); |
break; |
} |
break; |
default: |
break; |
} |
} |
return true; |
} |
// ============================================================================= |
// Common subexpression elimination. Stupid O^2 implementation. |
class LocalCSE : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
inline bool tryReplace(Instruction **, Instruction *); |
DLList ops[OP_LAST + 1]; |
}; |
class GlobalCSE : public Pass |
{ |
private: |
virtual bool visit(BasicBlock *); |
}; |
bool |
Instruction::isActionEqual(const Instruction *that) const |
{ |
if (this->op != that->op || |
this->dType != that->dType || |
this->sType != that->sType) |
return false; |
if (this->cc != that->cc) |
return false; |
if (this->asTex()) { |
if (memcmp(&this->asTex()->tex, |
&that->asTex()->tex, |
sizeof(this->asTex()->tex))) |
return false; |
} else |
if (this->asCmp()) { |
if (this->asCmp()->setCond != that->asCmp()->setCond) |
return false; |
} else |
if (this->asFlow()) { |
return false; |
} else { |
if (this->ipa != that->ipa || |
this->lanes != that->lanes || |
this->perPatch != that->perPatch) |
return false; |
if (this->postFactor != that->postFactor) |
return false; |
} |
if (this->subOp != that->subOp || |
this->saturate != that->saturate || |
this->rnd != that->rnd || |
this->ftz != that->ftz || |
this->dnz != that->dnz || |
this->cache != that->cache || |
this->mask != that->mask) |
return false; |
return true; |
} |
bool |
Instruction::isResultEqual(const Instruction *that) const |
{ |
unsigned int d, s; |
// NOTE: location of discard only affects tex with liveOnly and quadops |
if (!this->defExists(0) && this->op != OP_DISCARD) |
return false; |
if (!isActionEqual(that)) |
return false; |
if (this->predSrc != that->predSrc) |
return false; |
for (d = 0; this->defExists(d); ++d) { |
if (!that->defExists(d) || |
!this->getDef(d)->equals(that->getDef(d), false)) |
return false; |
} |
if (that->defExists(d)) |
return false; |
for (s = 0; this->srcExists(s); ++s) { |
if (!that->srcExists(s)) |
return false; |
if (this->src(s).mod != that->src(s).mod) |
return false; |
if (!this->getSrc(s)->equals(that->getSrc(s), true)) |
return false; |
} |
if (that->srcExists(s)) |
return false; |
if (op == OP_LOAD || op == OP_VFETCH) { |
switch (src(0).getFile()) { |
case FILE_MEMORY_CONST: |
case FILE_SHADER_INPUT: |
return true; |
default: |
return false; |
} |
} |
return true; |
} |
// pull through common expressions from different in-blocks |
bool |
GlobalCSE::visit(BasicBlock *bb) |
{ |
Instruction *phi, *next, *ik; |
int s; |
// TODO: maybe do this with OP_UNION, too |
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { |
next = phi->next; |
if (phi->getSrc(0)->refCount() > 1) |
continue; |
ik = phi->getSrc(0)->getInsn(); |
if (!ik) |
continue; // probably a function input |
for (s = 1; phi->srcExists(s); ++s) { |
if (phi->getSrc(s)->refCount() > 1) |
break; |
if (!phi->getSrc(s)->getInsn() || |
!phi->getSrc(s)->getInsn()->isResultEqual(ik)) |
break; |
} |
if (!phi->srcExists(s)) { |
Instruction *entry = bb->getEntry(); |
ik->bb->remove(ik); |
if (!entry || entry->op != OP_JOIN) |
bb->insertHead(ik); |
else |
bb->insertAfter(entry, ik); |
ik->setDef(0, phi->getDef(0)); |
delete_Instruction(prog, phi); |
} |
} |
return true; |
} |
bool |
LocalCSE::tryReplace(Instruction **ptr, Instruction *i) |
{ |
Instruction *old = *ptr; |
// TODO: maybe relax this later (causes trouble with OP_UNION) |
if (i->isPredicated()) |
return false; |
if (!old->isResultEqual(i)) |
return false; |
for (int d = 0; old->defExists(d); ++d) |
old->def(d).replace(i->getDef(d), false); |
delete_Instruction(prog, old); |
*ptr = NULL; |
return true; |
} |
bool |
LocalCSE::visit(BasicBlock *bb) |
{ |
unsigned int replaced; |
do { |
Instruction *ir, *next; |
replaced = 0; |
// will need to know the order of instructions |
int serial = 0; |
for (ir = bb->getFirst(); ir; ir = ir->next) |
ir->serial = serial++; |
for (ir = bb->getEntry(); ir; ir = next) { |
int s; |
Value *src = NULL; |
next = ir->next; |
if (ir->fixed) { |
ops[ir->op].insert(ir); |
continue; |
} |
for (s = 0; ir->srcExists(s); ++s) |
if (ir->getSrc(s)->asLValue()) |
if (!src || ir->getSrc(s)->refCount() < src->refCount()) |
src = ir->getSrc(s); |
if (src) { |
for (Value::UseIterator it = src->uses.begin(); |
it != src->uses.end(); ++it) { |
Instruction *ik = (*it)->getInsn(); |
if (ik && ik->bb == ir->bb && ik->serial < ir->serial) |
if (tryReplace(&ir, ik)) |
break; |
} |
} else { |
DLLIST_FOR_EACH(&ops[ir->op], iter) |
{ |
Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); |
if (tryReplace(&ir, ik)) |
break; |
} |
} |
if (ir) |
ops[ir->op].insert(ir); |
else |
++replaced; |
} |
for (unsigned int i = 0; i <= OP_LAST; ++i) |
ops[i].clear(); |
} while (replaced); |
return true; |
} |
// ============================================================================= |
// Remove computations of unused values. |
class DeadCodeElim : public Pass |
{ |
public: |
bool buryAll(Program *); |
private: |
virtual bool visit(BasicBlock *); |
void checkSplitLoad(Instruction *ld); // for partially dead loads |
unsigned int deadCount; |
}; |
bool |
DeadCodeElim::buryAll(Program *prog) |
{ |
do { |
deadCount = 0; |
if (!this->run(prog, false, false)) |
return false; |
} while (deadCount); |
return true; |
} |
bool |
DeadCodeElim::visit(BasicBlock *bb) |
{ |
Instruction *next; |
for (Instruction *i = bb->getFirst(); i; i = next) { |
next = i->next; |
if (i->isDead()) { |
++deadCount; |
delete_Instruction(prog, i); |
} else |
if (i->defExists(1) && (i->op == OP_VFETCH || i->op == OP_LOAD)) { |
checkSplitLoad(i); |
} else |
if (i->defExists(0) && !i->getDef(0)->refCount()) { |
if (i->op == OP_ATOM || |
i->op == OP_SUREDP || |
i->op == OP_SUREDB) |
i->setDef(0, NULL); |
} |
} |
return true; |
} |
void |
DeadCodeElim::checkSplitLoad(Instruction *ld1) |
{ |
Instruction *ld2 = NULL; // can get at most 2 loads |
Value *def1[4]; |
Value *def2[4]; |
int32_t addr1, addr2; |
int32_t size1, size2; |
int d, n1, n2; |
uint32_t mask = 0xffffffff; |
for (d = 0; ld1->defExists(d); ++d) |
if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) |
mask &= ~(1 << d); |
if (mask == 0xffffffff) |
return; |
addr1 = ld1->getSrc(0)->reg.data.offset; |
n1 = n2 = 0; |
size1 = size2 = 0; |
for (d = 0; ld1->defExists(d); ++d) { |
if (mask & (1 << d)) { |
if (size1 && (addr1 & 0x7)) |
break; |
def1[n1] = ld1->getDef(d); |
size1 += def1[n1++]->reg.size; |
} else |
if (!n1) { |
addr1 += ld1->getDef(d)->reg.size; |
} else { |
break; |
} |
} |
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { |
if (mask & (1 << d)) { |
def2[n2] = ld1->getDef(d); |
size2 += def2[n2++]->reg.size; |
} else { |
assert(!n2); |
addr2 += ld1->getDef(d)->reg.size; |
} |
} |
updateLdStOffset(ld1, addr1, func); |
ld1->setType(typeOfSize(size1)); |
for (d = 0; d < 4; ++d) |
ld1->setDef(d, (d < n1) ? def1[d] : NULL); |
if (!n2) |
return; |
ld2 = cloneShallow(func, ld1); |
updateLdStOffset(ld2, addr2, func); |
ld2->setType(typeOfSize(size2)); |
for (d = 0; d < 4; ++d) |
ld2->setDef(d, (d < n2) ? def2[d] : NULL); |
ld1->bb->insertAfter(ld1, ld2); |
} |
// ============================================================================= |
#define RUN_PASS(l, n, f) \ |
if (level >= (l)) { \ |
if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ |
INFO("PEEPHOLE: %s\n", #n); \ |
n pass; \ |
if (!pass.f(this)) \ |
return false; \ |
} |
bool |
Program::optimizeSSA(int level) |
{ |
RUN_PASS(1, DeadCodeElim, buryAll); |
RUN_PASS(1, CopyPropagation, run); |
RUN_PASS(1, MergeSplits, run); |
RUN_PASS(2, GlobalCSE, run); |
RUN_PASS(1, LocalCSE, run); |
RUN_PASS(2, AlgebraicOpt, run); |
RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks |
RUN_PASS(1, ConstantFolding, foldAll); |
RUN_PASS(1, LoadPropagation, run); |
RUN_PASS(2, MemoryOpt, run); |
RUN_PASS(2, LocalCSE, run); |
RUN_PASS(0, DeadCodeElim, buryAll); |
return true; |
} |
bool |
Program::optimizePostRA(int level) |
{ |
RUN_PASS(2, FlatteningPass, run); |
if (getTarget()->getChipset() < 0xc0) |
RUN_PASS(2, NV50PostRaConstantFolding, run); |
return true; |
} |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp |
---|
0,0 → 1,706 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
#define __STDC_FORMAT_MACROS |
#include <inttypes.h> |
namespace nv50_ir { |
enum TextStyle |
{ |
TXT_DEFAULT, |
TXT_GPR, |
TXT_REGISTER, |
TXT_FLAGS, |
TXT_MEM, |
TXT_IMMD, |
TXT_BRA, |
TXT_INSN |
}; |
static const char *_colour[8] = |
{ |
"\x1b[00m", |
"\x1b[34m", |
"\x1b[35m", |
"\x1b[35m", |
"\x1b[36m", |
"\x1b[33m", |
"\x1b[37m", |
"\x1b[32m" |
}; |
static const char *_nocolour[8] = |
{ |
"", "", "", "", "", "", "", "" |
}; |
static const char **colour; |
static void init_colours() |
{ |
if (getenv("NV50_PROG_DEBUG_NO_COLORS") != NULL) |
colour = _nocolour; |
else |
colour = _colour; |
} |
const char *operationStr[OP_LAST + 1] = |
{ |
"nop", |
"phi", |
"union", |
"split", |
"merge", |
"consec", |
"mov", |
"ld", |
"st", |
"add", |
"sub", |
"mul", |
"div", |
"mod", |
"mad", |
"fma", |
"sad", |
"abs", |
"neg", |
"not", |
"and", |
"or", |
"xor", |
"shl", |
"shr", |
"max", |
"min", |
"sat", |
"ceil", |
"floor", |
"trunc", |
"cvt", |
"set and", |
"set or", |
"set xor", |
"set", |
"selp", |
"slct", |
"rcp", |
"rsq", |
"lg2", |
"sin", |
"cos", |
"ex2", |
"exp", |
"log", |
"presin", |
"preex2", |
"sqrt", |
"pow", |
"bra", |
"call", |
"ret", |
"cont", |
"break", |
"preret", |
"precont", |
"prebreak", |
"brkpt", |
"joinat", |
"join", |
"discard", |
"exit", |
"membar", |
"vfetch", |
"pfetch", |
"export", |
"linterp", |
"pinterp", |
"emit", |
"restart", |
"tex", |
"texbias", |
"texlod", |
"texfetch", |
"texquery", |
"texgrad", |
"texgather", |
"texquerylod", |
"texcsaa", |
"texprep", |
"suldb", |
"suldp", |
"sustb", |
"sustp", |
"suredb", |
"suredp", |
"sulea", |
"subfm", |
"suclamp", |
"sueau", |
"madsp", |
"texbar", |
"dfdx", |
"dfdy", |
"rdsv", |
"wrsv", |
"pixld", |
"quadop", |
"quadon", |
"quadpop", |
"popcnt", |
"insbf", |
"extbf", |
"bfind", |
"permt", |
"atom", |
"bar", |
"vadd", |
"vavg", |
"vmin", |
"vmax", |
"vsad", |
"vset", |
"vshr", |
"vshl", |
"vsel", |
"cctl", |
"shfl", |
"(invalid)" |
}; |
static const char *atomSubOpStr[] = |
{ |
"add", "min", "max", "inc", "dec", "and", "or", "xor", "cas", "exch" |
}; |
static const char *DataTypeStr[] = |
{ |
"-", |
"u8", "s8", |
"u16", "s16", |
"u32", "s32", |
"u64", "s64", |
"f16", "f32", "f64", |
"b96", "b128" |
}; |
static const char *RoundModeStr[] = |
{ |
"", "rm", "rz", "rp", "rni", "rmi", "rzi", "rpi" |
}; |
static const char *CondCodeStr[] = |
{ |
"never", |
"lt", |
"eq", |
"le", |
"gt", |
"ne", |
"ge", |
"", |
"(invalid)", |
"ltu", |
"equ", |
"leu", |
"gtu", |
"neu", |
"geu", |
"", |
"no", |
"nc", |
"ns", |
"na", |
"a", |
"s", |
"c", |
"o" |
}; |
static const char *SemanticStr[SV_LAST + 1] = |
{ |
"POSITION", |
"VERTEX_ID", |
"INSTANCE_ID", |
"INVOCATION_ID", |
"PRIMITIVE_ID", |
"VERTEX_COUNT", |
"LAYER", |
"VIEWPORT_INDEX", |
"Y_DIR", |
"FACE", |
"POINT_SIZE", |
"POINT_COORD", |
"CLIP_DISTANCE", |
"SAMPLE_INDEX", |
"SAMPLE_POS", |
"SAMPLE_MASK", |
"TESS_FACTOR", |
"TESS_COORD", |
"TID", |
"CTAID", |
"NTID", |
"GRIDID", |
"NCTAID", |
"LANEID", |
"PHYSID", |
"NPHYSID", |
"CLOCK", |
"LBASE", |
"SBASE", |
"VERTEX_STRIDE", |
"INVOCATION_INFO", |
"?", |
"(INVALID)" |
}; |
static const char *interpStr[16] = |
{ |
"pass", |
"mul", |
"flat", |
"sc", |
"cent pass", |
"cent mul", |
"cent flat", |
"cent sc", |
"off pass", |
"off mul", |
"off flat", |
"off sc", |
"samp pass", |
"samp mul", |
"samp flat", |
"samp sc" |
}; |
#define PRINT(args...) \ |
do { \ |
pos += snprintf(&buf[pos], size - pos, args); \ |
} while(0) |
#define SPACE_PRINT(cond, args...) \ |
do { \ |
if (cond) \ |
buf[pos++] = ' '; \ |
pos += snprintf(&buf[pos], size - pos, args); \ |
} while(0) |
#define SPACE() \ |
do { \ |
if (pos < size) \ |
buf[pos++] = ' '; \ |
} while(0) |
int Modifier::print(char *buf, size_t size) const |
{ |
size_t pos = 0; |
if (bits) |
PRINT("%s", colour[TXT_INSN]); |
size_t base = pos; |
if (bits & NV50_IR_MOD_NOT) |
PRINT("not"); |
if (bits & NV50_IR_MOD_SAT) |
SPACE_PRINT(pos > base && pos < size, "sat"); |
if (bits & NV50_IR_MOD_NEG) |
SPACE_PRINT(pos > base && pos < size, "neg"); |
if (bits & NV50_IR_MOD_ABS) |
SPACE_PRINT(pos > base && pos < size, "abs"); |
return pos; |
} |
int LValue::print(char *buf, size_t size, DataType ty) const |
{ |
const char *postFix = ""; |
size_t pos = 0; |
int idx = join->reg.data.id >= 0 ? join->reg.data.id : id; |
char p = join->reg.data.id >= 0 ? '$' : '%'; |
char r; |
int col = TXT_DEFAULT; |
switch (reg.file) { |
case FILE_GPR: |
r = 'r'; col = TXT_GPR; |
if (reg.size == 2) { |
if (p == '$') { |
postFix = (idx & 1) ? "h" : "l"; |
idx /= 2; |
} else { |
postFix = "s"; |
} |
} else |
if (reg.size == 8) { |
postFix = "d"; |
} else |
if (reg.size == 16) { |
postFix = "q"; |
} else |
if (reg.size == 12) { |
postFix = "t"; |
} |
break; |
case FILE_PREDICATE: |
r = 'p'; col = TXT_REGISTER; |
if (reg.size == 2) |
postFix = "d"; |
else |
if (reg.size == 4) |
postFix = "q"; |
break; |
case FILE_FLAGS: |
r = 'c'; col = TXT_FLAGS; |
break; |
case FILE_ADDRESS: |
r = 'a'; col = TXT_REGISTER; |
break; |
default: |
assert(!"invalid file for lvalue"); |
r = '?'; |
break; |
} |
PRINT("%s%c%c%i%s", colour[col], p, r, idx, postFix); |
return pos; |
} |
int ImmediateValue::print(char *buf, size_t size, DataType ty) const |
{ |
size_t pos = 0; |
PRINT("%s", colour[TXT_IMMD]); |
switch (ty) { |
case TYPE_F32: PRINT("%f", reg.data.f32); break; |
case TYPE_F64: PRINT("%f", reg.data.f64); break; |
case TYPE_U8: PRINT("0x%02x", reg.data.u8); break; |
case TYPE_S8: PRINT("%i", reg.data.s8); break; |
case TYPE_U16: PRINT("0x%04x", reg.data.u16); break; |
case TYPE_S16: PRINT("%i", reg.data.s16); break; |
case TYPE_U32: PRINT("0x%08x", reg.data.u32); break; |
case TYPE_S32: PRINT("%i", reg.data.s32); break; |
case TYPE_U64: |
case TYPE_S64: |
default: |
PRINT("0x%016"PRIx64, reg.data.u64); |
break; |
} |
return pos; |
} |
int Symbol::print(char *buf, size_t size, DataType ty) const |
{ |
return print(buf, size, NULL, NULL, ty); |
} |
int Symbol::print(char *buf, size_t size, |
Value *rel, Value *dimRel, DataType ty) const |
{ |
size_t pos = 0; |
char c; |
if (ty == TYPE_NONE) |
ty = typeOfSize(reg.size); |
if (reg.file == FILE_SYSTEM_VALUE) { |
PRINT("%ssv[%s%s:%i%s", colour[TXT_MEM], |
colour[TXT_REGISTER], |
SemanticStr[reg.data.sv.sv], reg.data.sv.index, colour[TXT_MEM]); |
if (rel) { |
PRINT("%s+", colour[TXT_DEFAULT]); |
pos += rel->print(&buf[pos], size - pos); |
} |
PRINT("%s]", colour[TXT_MEM]); |
return pos; |
} |
switch (reg.file) { |
case FILE_MEMORY_CONST: c = 'c'; break; |
case FILE_SHADER_INPUT: c = 'a'; break; |
case FILE_SHADER_OUTPUT: c = 'o'; break; |
case FILE_MEMORY_GLOBAL: c = 'g'; break; |
case FILE_MEMORY_SHARED: c = 's'; break; |
case FILE_MEMORY_LOCAL: c = 'l'; break; |
default: |
assert(!"invalid file"); |
c = '?'; |
break; |
} |
if (c == 'c') |
PRINT("%s%c%i[", colour[TXT_MEM], c, reg.fileIndex); |
else |
PRINT("%s%c[", colour[TXT_MEM], c); |
if (dimRel) { |
pos += dimRel->print(&buf[pos], size - pos, TYPE_S32); |
PRINT("%s][", colour[TXT_MEM]); |
} |
if (rel) { |
pos += rel->print(&buf[pos], size - pos); |
PRINT("%s%c", colour[TXT_DEFAULT], (reg.data.offset < 0) ? '-' : '+'); |
} else { |
assert(reg.data.offset >= 0); |
} |
PRINT("%s0x%x%s]", colour[TXT_IMMD], abs(reg.data.offset), colour[TXT_MEM]); |
return pos; |
} |
void Instruction::print() const |
{ |
#define BUFSZ 512 |
const size_t size = BUFSZ; |
char buf[BUFSZ]; |
int s, d; |
size_t pos = 0; |
PRINT("%s", colour[TXT_INSN]); |
if (join) |
PRINT("join "); |
if (predSrc >= 0) { |
const size_t pre = pos; |
if (getSrc(predSrc)->reg.file == FILE_PREDICATE) { |
if (cc == CC_NOT_P) |
PRINT("not"); |
} else { |
PRINT("%s", CondCodeStr[cc]); |
} |
if (pos > pre) |
SPACE(); |
pos += getSrc(predSrc)->print(&buf[pos], BUFSZ - pos); |
PRINT(" %s", colour[TXT_INSN]); |
} |
if (saturate) |
PRINT("sat "); |
if (asFlow()) { |
PRINT("%s", operationStr[op]); |
if (asFlow()->indirect) |
PRINT(" ind"); |
if (asFlow()->absolute) |
PRINT(" abs"); |
if (op == OP_CALL && asFlow()->builtin) { |
PRINT(" %sBUILTIN:%i", colour[TXT_BRA], asFlow()->target.builtin); |
} else |
if (op == OP_CALL && asFlow()->target.fn) { |
PRINT(" %s%s:%i", colour[TXT_BRA], |
asFlow()->target.fn->getName(), |
asFlow()->target.fn->getLabel()); |
} else |
if (asFlow()->target.bb) |
PRINT(" %sBB:%i", colour[TXT_BRA], asFlow()->target.bb->getId()); |
} else { |
PRINT("%s ", operationStr[op]); |
if (op == OP_LINTERP || op == OP_PINTERP) |
PRINT("%s ", interpStr[ipa]); |
switch (op) { |
case OP_SUREDP: |
case OP_ATOM: |
if (subOp < Elements(atomSubOpStr)) |
PRINT("%s ", atomSubOpStr[subOp]); |
break; |
default: |
if (subOp) |
PRINT("(SUBOP:%u) ", subOp); |
break; |
} |
if (perPatch) |
PRINT("patch "); |
if (asTex()) |
PRINT("%s %s$r%u $s%u %s", asTex()->tex.target.getName(), |
colour[TXT_MEM], asTex()->tex.r, asTex()->tex.s, |
colour[TXT_INSN]); |
if (postFactor) |
PRINT("x2^%i ", postFactor); |
PRINT("%s%s", dnz ? "dnz " : (ftz ? "ftz " : ""), DataTypeStr[dType]); |
} |
if (rnd != ROUND_N) |
PRINT(" %s", RoundModeStr[rnd]); |
if (defExists(1)) |
PRINT(" {"); |
for (d = 0; defExists(d); ++d) { |
SPACE(); |
pos += getDef(d)->print(&buf[pos], size - pos); |
} |
if (d > 1) |
PRINT(" %s}", colour[TXT_INSN]); |
else |
if (!d && !asFlow()) |
PRINT(" %s#", colour[TXT_INSN]); |
if (asCmp()) |
PRINT(" %s%s", colour[TXT_INSN], CondCodeStr[asCmp()->setCond]); |
if (sType != dType) |
PRINT(" %s%s", colour[TXT_INSN], DataTypeStr[sType]); |
for (s = 0; srcExists(s); ++s) { |
if (s == predSrc || src(s).usedAsPtr) |
continue; |
const size_t pre = pos; |
SPACE(); |
pos += src(s).mod.print(&buf[pos], BUFSZ - pos); |
if (pos > pre + 1) |
SPACE(); |
if (src(s).isIndirect(0) || src(s).isIndirect(1)) |
pos += getSrc(s)->asSym()->print(&buf[pos], BUFSZ - pos, |
getIndirect(s, 0), |
getIndirect(s, 1)); |
else |
pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType); |
} |
if (exit) |
PRINT("%s exit", colour[TXT_INSN]); |
PRINT("%s", colour[TXT_DEFAULT]); |
buf[MIN2(pos, BUFSZ - 1)] = 0; |
INFO("%s (%u)\n", buf, encSize); |
} |
class PrintPass : public Pass |
{ |
public: |
PrintPass() : serial(0) { } |
virtual bool visit(Function *); |
virtual bool visit(BasicBlock *); |
virtual bool visit(Instruction *); |
private: |
int serial; |
}; |
bool |
PrintPass::visit(Function *fn) |
{ |
char str[16]; |
INFO("\n%s:%i (", fn->getName(), fn->getLabel()); |
if (!fn->outs.empty()) |
INFO("out"); |
for (std::deque<ValueRef>::iterator it = fn->outs.begin(); |
it != fn->outs.end(); |
++it) { |
it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size)); |
INFO(" %s", str); |
} |
if (!fn->ins.empty()) |
INFO("%s%sin", colour[TXT_DEFAULT], fn->outs.empty() ? "" : ", "); |
for (std::deque<ValueDef>::iterator it = fn->ins.begin(); |
it != fn->ins.end(); |
++it) { |
it->get()->print(str, sizeof(str), typeOfSize(it->get()->reg.size)); |
INFO(" %s", str); |
} |
INFO("%s)\n", colour[TXT_DEFAULT]); |
return true; |
} |
bool |
PrintPass::visit(BasicBlock *bb) |
{ |
#if 0 |
INFO("---\n"); |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) |
INFO(" <- BB:%i (%s)\n", |
BasicBlock::get(ei.getNode())->getId(), |
ei.getEdge()->typeStr()); |
#endif |
INFO("BB:%i (%u instructions) - ", bb->getId(), bb->getInsnCount()); |
if (bb->idom()) |
INFO("idom = BB:%i, ", bb->idom()->getId()); |
INFO("df = { "); |
for (DLList::Iterator df = bb->getDF().iterator(); !df.end(); df.next()) |
INFO("BB:%i ", BasicBlock::get(df)->getId()); |
INFO("}\n"); |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) |
INFO(" -> BB:%i (%s)\n", |
BasicBlock::get(ei.getNode())->getId(), |
ei.getEdge()->typeStr()); |
return true; |
} |
bool |
PrintPass::visit(Instruction *insn) |
{ |
INFO("%3i: ", serial++); |
insn->print(); |
return true; |
} |
void |
Function::print() |
{ |
PrintPass pass; |
pass.run(this, true, false); |
} |
void |
Program::print() |
{ |
PrintPass pass; |
init_colours(); |
pass.run(this, true, false); |
} |
void |
Function::printLiveIntervals() const |
{ |
INFO("printing live intervals ...\n"); |
for (ArrayList::Iterator it = allLValues.iterator(); !it.end(); it.next()) { |
const Value *lval = Value::get(it)->asLValue(); |
if (lval && !lval->livei.isEmpty()) { |
INFO("livei(%%%i): ", lval->id); |
lval->livei.print(); |
} |
} |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp |
---|
0,0 → 1,2146 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
#include <stack> |
#include <limits> |
#include <tr1/unordered_set> |
namespace nv50_ir { |
#define MAX_REGISTER_FILE_SIZE 256 |
class RegisterSet |
{ |
public: |
RegisterSet(const Target *); |
void init(const Target *); |
void reset(DataFile, bool resetMax = false); |
void periodicMask(DataFile f, uint32_t lock, uint32_t unlock); |
void intersect(DataFile f, const RegisterSet *); |
bool assign(int32_t& reg, DataFile f, unsigned int size); |
void release(DataFile f, int32_t reg, unsigned int size); |
void occupy(DataFile f, int32_t reg, unsigned int size); |
void occupy(const Value *); |
void occupyMask(DataFile f, int32_t reg, uint8_t mask); |
bool isOccupied(DataFile f, int32_t reg, unsigned int size) const; |
bool testOccupy(const Value *); |
bool testOccupy(DataFile f, int32_t reg, unsigned int size); |
inline int getMaxAssigned(DataFile f) const { return fill[f]; } |
inline unsigned int getFileSize(DataFile f, uint8_t regSize) const |
{ |
if (restrictedGPR16Range && f == FILE_GPR && regSize == 2) |
return (last[f] + 1) / 2; |
return last[f] + 1; |
} |
inline unsigned int units(DataFile f, unsigned int size) const |
{ |
return size >> unit[f]; |
} |
// for regs of size >= 4, id is counted in 4-byte words (like nv50/c0 binary) |
inline unsigned int idToBytes(const Value *v) const |
{ |
return v->reg.data.id * MIN2(v->reg.size, 4); |
} |
inline unsigned int idToUnits(const Value *v) const |
{ |
return units(v->reg.file, idToBytes(v)); |
} |
inline int bytesToId(Value *v, unsigned int bytes) const |
{ |
if (v->reg.size < 4) |
return units(v->reg.file, bytes); |
return bytes / 4; |
} |
inline int unitsToId(DataFile f, int u, uint8_t size) const |
{ |
if (u < 0) |
return -1; |
return (size < 4) ? u : ((u << unit[f]) / 4); |
} |
void print() const; |
private: |
BitSet bits[LAST_REGISTER_FILE + 1]; |
int unit[LAST_REGISTER_FILE + 1]; // log2 of allocation granularity |
int last[LAST_REGISTER_FILE + 1]; |
int fill[LAST_REGISTER_FILE + 1]; |
const bool restrictedGPR16Range; |
}; |
void |
RegisterSet::reset(DataFile f, bool resetMax) |
{ |
bits[f].fill(0); |
if (resetMax) |
fill[f] = -1; |
} |
void |
RegisterSet::init(const Target *targ) |
{ |
for (unsigned int rf = 0; rf <= FILE_ADDRESS; ++rf) { |
DataFile f = static_cast<DataFile>(rf); |
last[rf] = targ->getFileSize(f) - 1; |
unit[rf] = targ->getFileUnit(f); |
fill[rf] = -1; |
assert(last[rf] < MAX_REGISTER_FILE_SIZE); |
bits[rf].allocate(last[rf] + 1, true); |
} |
} |
RegisterSet::RegisterSet(const Target *targ) |
: restrictedGPR16Range(targ->getChipset() < 0xc0) |
{ |
init(targ); |
for (unsigned int i = 0; i <= LAST_REGISTER_FILE; ++i) |
reset(static_cast<DataFile>(i)); |
} |
void |
RegisterSet::periodicMask(DataFile f, uint32_t lock, uint32_t unlock) |
{ |
bits[f].periodicMask32(lock, unlock); |
} |
void |
RegisterSet::intersect(DataFile f, const RegisterSet *set) |
{ |
bits[f] |= set->bits[f]; |
} |
void |
RegisterSet::print() const |
{ |
INFO("GPR:"); |
bits[FILE_GPR].print(); |
INFO("\n"); |
} |
bool |
RegisterSet::assign(int32_t& reg, DataFile f, unsigned int size) |
{ |
reg = bits[f].findFreeRange(size); |
if (reg < 0) |
return false; |
fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); |
return true; |
} |
bool |
RegisterSet::isOccupied(DataFile f, int32_t reg, unsigned int size) const |
{ |
return bits[f].testRange(reg, size); |
} |
void |
RegisterSet::occupy(const Value *v) |
{ |
occupy(v->reg.file, idToUnits(v), v->reg.size >> unit[v->reg.file]); |
} |
void |
RegisterSet::occupyMask(DataFile f, int32_t reg, uint8_t mask) |
{ |
bits[f].setMask(reg & ~31, static_cast<uint32_t>(mask) << (reg % 32)); |
} |
void |
RegisterSet::occupy(DataFile f, int32_t reg, unsigned int size) |
{ |
bits[f].setRange(reg, size); |
INFO_DBG(0, REG_ALLOC, "reg occupy: %u[%i] %u\n", f, reg, size); |
fill[f] = MAX2(fill[f], (int32_t)(reg + size - 1)); |
} |
bool |
RegisterSet::testOccupy(const Value *v) |
{ |
return testOccupy(v->reg.file, |
idToUnits(v), v->reg.size >> unit[v->reg.file]); |
} |
bool |
RegisterSet::testOccupy(DataFile f, int32_t reg, unsigned int size) |
{ |
if (isOccupied(f, reg, size)) |
return false; |
occupy(f, reg, size); |
return true; |
} |
void |
RegisterSet::release(DataFile f, int32_t reg, unsigned int size) |
{ |
bits[f].clrRange(reg, size); |
INFO_DBG(0, REG_ALLOC, "reg release: %u[%i] %u\n", f, reg, size); |
} |
class RegAlloc |
{ |
public: |
RegAlloc(Program *program) : prog(program), sequence(0) { } |
bool exec(); |
bool execFunc(); |
private: |
class PhiMovesPass : public Pass { |
private: |
virtual bool visit(BasicBlock *); |
inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p); |
}; |
class ArgumentMovesPass : public Pass { |
private: |
virtual bool visit(BasicBlock *); |
}; |
class BuildIntervalsPass : public Pass { |
private: |
virtual bool visit(BasicBlock *); |
void collectLiveValues(BasicBlock *); |
void addLiveRange(Value *, const BasicBlock *, int end); |
}; |
class InsertConstraintsPass : public Pass { |
public: |
bool exec(Function *func); |
private: |
virtual bool visit(BasicBlock *); |
bool insertConstraintMoves(); |
void condenseDefs(Instruction *); |
void condenseSrcs(Instruction *, const int first, const int last); |
void addHazard(Instruction *i, const ValueRef *src); |
void textureMask(TexInstruction *); |
void addConstraint(Instruction *, int s, int n); |
bool detectConflict(Instruction *, int s); |
// target specific functions, TODO: put in subclass or Target |
void texConstraintNV50(TexInstruction *); |
void texConstraintNVC0(TexInstruction *); |
void texConstraintNVE0(TexInstruction *); |
void texConstraintGM107(TexInstruction *); |
std::list<Instruction *> constrList; |
const Target *targ; |
}; |
bool buildLiveSets(BasicBlock *); |
private: |
Program *prog; |
Function *func; |
// instructions in control flow / chronological order |
ArrayList insns; |
int sequence; // for manual passes through CFG |
}; |
typedef std::pair<Value *, Value *> ValuePair; |
class SpillCodeInserter |
{ |
public: |
SpillCodeInserter(Function *fn) : func(fn), stackSize(0), stackBase(0) { } |
bool run(const std::list<ValuePair>&); |
Symbol *assignSlot(const Interval&, const unsigned int size); |
Value *offsetSlot(Value *, const LValue *); |
inline int32_t getStackSize() const { return stackSize; } |
private: |
Function *func; |
struct SpillSlot |
{ |
Interval occup; |
std::list<Value *> residents; // needed to recalculate occup |
Symbol *sym; |
int32_t offset; |
inline uint8_t size() const { return sym->reg.size; } |
}; |
std::list<SpillSlot> slots; |
int32_t stackSize; |
int32_t stackBase; |
LValue *unspill(Instruction *usei, LValue *, Value *slot); |
void spill(Instruction *defi, Value *slot, LValue *); |
}; |
void |
RegAlloc::BuildIntervalsPass::addLiveRange(Value *val, |
const BasicBlock *bb, |
int end) |
{ |
Instruction *insn = val->getUniqueInsn(); |
if (!insn) |
insn = bb->getFirst(); |
assert(bb->getFirst()->serial <= bb->getExit()->serial); |
assert(bb->getExit()->serial + 1 >= end); |
int begin = insn->serial; |
if (begin < bb->getEntry()->serial || begin > bb->getExit()->serial) |
begin = bb->getEntry()->serial; |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "%%%i <- live range [%i(%i), %i)\n", |
val->id, begin, insn->serial, end); |
if (begin != end) // empty ranges are only added as hazards for fixed regs |
val->livei.extend(begin, end); |
} |
bool |
RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p) |
{ |
if (b->cfg.incidentCount() <= 1) |
return false; |
int n = 0; |
for (Graph::EdgeIterator ei = p->cfg.outgoing(); !ei.end(); ei.next()) |
if (ei.getType() == Graph::Edge::TREE || |
ei.getType() == Graph::Edge::FORWARD) |
++n; |
return (n == 2); |
} |
// For each operand of each PHI in b, generate a new value by inserting a MOV |
// at the end of the block it is coming from and replace the operand with its |
// result. This eliminates liveness conflicts and enables us to let values be |
// copied to the right register if such a conflict exists nonetheless. |
// |
// These MOVs are also crucial in making sure the live intervals of phi srces |
// are extended until the end of the loop, since they are not included in the |
// live-in sets. |
bool |
RegAlloc::PhiMovesPass::visit(BasicBlock *bb) |
{ |
Instruction *phi, *mov; |
BasicBlock *pb, *pn; |
std::stack<BasicBlock *> stack; |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { |
pb = BasicBlock::get(ei.getNode()); |
assert(pb); |
if (needNewElseBlock(bb, pb)) |
stack.push(pb); |
} |
while (!stack.empty()) { |
pb = stack.top(); |
pn = new BasicBlock(func); |
stack.pop(); |
pb->cfg.detach(&bb->cfg); |
pb->cfg.attach(&pn->cfg, Graph::Edge::TREE); |
pn->cfg.attach(&bb->cfg, Graph::Edge::FORWARD); |
assert(pb->getExit()->op != OP_CALL); |
if (pb->getExit()->asFlow()->target.bb == bb) |
pb->getExit()->asFlow()->target.bb = pn; |
} |
// insert MOVs (phi->src(j) should stem from j-th in-BB) |
int j = 0; |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { |
pb = BasicBlock::get(ei.getNode()); |
if (!pb->isTerminated()) |
pb->insertTail(new_FlowInstruction(func, OP_BRA, bb)); |
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { |
LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue()); |
mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); |
mov->setSrc(0, phi->getSrc(j)); |
mov->setDef(0, tmp); |
phi->setSrc(j, tmp); |
pb->insertBefore(pb->getExit(), mov); |
} |
++j; |
} |
return true; |
} |
bool |
RegAlloc::ArgumentMovesPass::visit(BasicBlock *bb) |
{ |
// Bind function call inputs/outputs to the same physical register |
// the callee uses, inserting moves as appropriate for the case a |
// conflict arises. |
for (Instruction *i = bb->getEntry(); i; i = i->next) { |
FlowInstruction *cal = i->asFlow(); |
// TODO: Handle indirect calls. |
// Right now they should only be generated for builtins. |
if (!cal || cal->op != OP_CALL || cal->builtin || cal->indirect) |
continue; |
RegisterSet clobberSet(prog->getTarget()); |
// Bind input values. |
for (int s = cal->indirect ? 1 : 0; cal->srcExists(s); ++s) { |
const int t = cal->indirect ? (s - 1) : s; |
LValue *tmp = new_LValue(func, cal->getSrc(s)->asLValue()); |
tmp->reg.data.id = cal->target.fn->ins[t].rep()->reg.data.id; |
Instruction *mov = |
new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); |
mov->setDef(0, tmp); |
mov->setSrc(0, cal->getSrc(s)); |
cal->setSrc(s, tmp); |
bb->insertBefore(cal, mov); |
} |
// Bind output values. |
for (int d = 0; cal->defExists(d); ++d) { |
LValue *tmp = new_LValue(func, cal->getDef(d)->asLValue()); |
tmp->reg.data.id = cal->target.fn->outs[d].rep()->reg.data.id; |
Instruction *mov = |
new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); |
mov->setSrc(0, tmp); |
mov->setDef(0, cal->getDef(d)); |
cal->setDef(d, tmp); |
bb->insertAfter(cal, mov); |
clobberSet.occupy(tmp); |
} |
// Bind clobbered values. |
for (std::deque<Value *>::iterator it = cal->target.fn->clobbers.begin(); |
it != cal->target.fn->clobbers.end(); |
++it) { |
if (clobberSet.testOccupy(*it)) { |
Value *tmp = new_LValue(func, (*it)->asLValue()); |
tmp->reg.data.id = (*it)->reg.data.id; |
cal->setDef(cal->defCount(), tmp); |
} |
} |
} |
// Update the clobber set of the function. |
if (BasicBlock::get(func->cfgExit) == bb) { |
func->buildDefSets(); |
for (unsigned int i = 0; i < bb->defSet.getSize(); ++i) |
if (bb->defSet.test(i)) |
func->clobbers.push_back(func->getLValue(i)); |
} |
return true; |
} |
// Build the set of live-in variables of bb. |
bool |
RegAlloc::buildLiveSets(BasicBlock *bb) |
{ |
Function *f = bb->getFunction(); |
BasicBlock *bn; |
Instruction *i; |
unsigned int s, d; |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "buildLiveSets(BB:%i)\n", bb->getId()); |
bb->liveSet.allocate(func->allLValues.getSize(), false); |
int n = 0; |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
bn = BasicBlock::get(ei.getNode()); |
if (bn == bb) |
continue; |
if (bn->cfg.visit(sequence)) |
if (!buildLiveSets(bn)) |
return false; |
if (n++ || bb->liveSet.marker) |
bb->liveSet |= bn->liveSet; |
else |
bb->liveSet = bn->liveSet; |
} |
if (!n && !bb->liveSet.marker) |
bb->liveSet.fill(0); |
bb->liveSet.marker = true; |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { |
INFO("BB:%i live set of out blocks:\n", bb->getId()); |
bb->liveSet.print(); |
} |
// if (!bb->getEntry()) |
// return true; |
if (bb == BasicBlock::get(f->cfgExit)) { |
for (std::deque<ValueRef>::iterator it = f->outs.begin(); |
it != f->outs.end(); ++it) { |
assert(it->get()->asLValue()); |
bb->liveSet.set(it->get()->id); |
} |
} |
for (i = bb->getExit(); i && i != bb->getEntry()->prev; i = i->prev) { |
for (d = 0; i->defExists(d); ++d) |
bb->liveSet.clr(i->getDef(d)->id); |
for (s = 0; i->srcExists(s); ++s) |
if (i->getSrc(s)->asLValue()) |
bb->liveSet.set(i->getSrc(s)->id); |
} |
for (i = bb->getPhi(); i && i->op == OP_PHI; i = i->next) |
bb->liveSet.clr(i->getDef(0)->id); |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { |
INFO("BB:%i live set after propagation:\n", bb->getId()); |
bb->liveSet.print(); |
} |
return true; |
} |
void |
RegAlloc::BuildIntervalsPass::collectLiveValues(BasicBlock *bb) |
{ |
BasicBlock *bbA = NULL, *bbB = NULL; |
if (bb->cfg.outgoingCount()) { |
// trickery to save a loop of OR'ing liveSets |
// aliasing works fine with BitSet::setOr |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
if (ei.getType() == Graph::Edge::DUMMY) |
continue; |
if (bbA) { |
bb->liveSet.setOr(&bbA->liveSet, &bbB->liveSet); |
bbA = bb; |
} else { |
bbA = bbB; |
} |
bbB = BasicBlock::get(ei.getNode()); |
} |
bb->liveSet.setOr(&bbB->liveSet, bbA ? &bbA->liveSet : NULL); |
} else |
if (bb->cfg.incidentCount()) { |
bb->liveSet.fill(0); |
} |
} |
bool |
RegAlloc::BuildIntervalsPass::visit(BasicBlock *bb) |
{ |
collectLiveValues(bb); |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "BuildIntervals(BB:%i)\n", bb->getId()); |
// go through out blocks and delete phi sources that do not originate from |
// the current block from the live set |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
BasicBlock *out = BasicBlock::get(ei.getNode()); |
for (Instruction *i = out->getPhi(); i && i->op == OP_PHI; i = i->next) { |
bb->liveSet.clr(i->getDef(0)->id); |
for (int s = 0; i->srcExists(s); ++s) { |
assert(i->src(s).getInsn()); |
if (i->getSrc(s)->getUniqueInsn()->bb == bb) // XXX: reachableBy ? |
bb->liveSet.set(i->getSrc(s)->id); |
else |
bb->liveSet.clr(i->getSrc(s)->id); |
} |
} |
} |
// remaining live-outs are live until end |
if (bb->getExit()) { |
for (unsigned int j = 0; j < bb->liveSet.getSize(); ++j) |
if (bb->liveSet.test(j)) |
addLiveRange(func->getLValue(j), bb, bb->getExit()->serial + 1); |
} |
for (Instruction *i = bb->getExit(); i && i->op != OP_PHI; i = i->prev) { |
for (int d = 0; i->defExists(d); ++d) { |
bb->liveSet.clr(i->getDef(d)->id); |
if (i->getDef(d)->reg.data.id >= 0) // add hazard for fixed regs |
i->getDef(d)->livei.extend(i->serial, i->serial); |
} |
for (int s = 0; i->srcExists(s); ++s) { |
if (!i->getSrc(s)->asLValue()) |
continue; |
if (!bb->liveSet.test(i->getSrc(s)->id)) { |
bb->liveSet.set(i->getSrc(s)->id); |
addLiveRange(i->getSrc(s), bb, i->serial); |
} |
} |
} |
if (bb == BasicBlock::get(func->cfg.getRoot())) { |
for (std::deque<ValueDef>::iterator it = func->ins.begin(); |
it != func->ins.end(); ++it) { |
if (it->get()->reg.data.id >= 0) // add hazard for fixed regs |
it->get()->livei.extend(0, 1); |
} |
} |
return true; |
} |
#define JOIN_MASK_PHI (1 << 0) |
#define JOIN_MASK_UNION (1 << 1) |
#define JOIN_MASK_MOV (1 << 2) |
#define JOIN_MASK_TEX (1 << 3) |
class GCRA |
{ |
public: |
GCRA(Function *, SpillCodeInserter&); |
~GCRA(); |
bool allocateRegisters(ArrayList& insns); |
void printNodeInfo() const; |
private: |
class RIG_Node : public Graph::Node |
{ |
public: |
RIG_Node(); |
void init(const RegisterSet&, LValue *); |
void addInterference(RIG_Node *); |
void addRegPreference(RIG_Node *); |
inline LValue *getValue() const |
{ |
return reinterpret_cast<LValue *>(data); |
} |
inline void setValue(LValue *lval) { data = lval; } |
inline uint8_t getCompMask() const |
{ |
return ((1 << colors) - 1) << (reg & 7); |
} |
static inline RIG_Node *get(const Graph::EdgeIterator& ei) |
{ |
return static_cast<RIG_Node *>(ei.getNode()); |
} |
public: |
uint32_t degree; |
uint16_t degreeLimit; // if deg < degLimit, node is trivially colourable |
uint16_t colors; |
DataFile f; |
int32_t reg; |
float weight; |
// list pointers for simplify() phase |
RIG_Node *next; |
RIG_Node *prev; |
// union of the live intervals of all coalesced values (we want to retain |
// the separate intervals for testing interference of compound values) |
Interval livei; |
std::list<RIG_Node *> prefRegs; |
}; |
private: |
inline RIG_Node *getNode(const LValue *v) const { return &nodes[v->id]; } |
void buildRIG(ArrayList&); |
bool coalesce(ArrayList&); |
bool doCoalesce(ArrayList&, unsigned int mask); |
void calculateSpillWeights(); |
void simplify(); |
bool selectRegisters(); |
void cleanup(const bool success); |
void simplifyEdge(RIG_Node *, RIG_Node *); |
void simplifyNode(RIG_Node *); |
bool coalesceValues(Value *, Value *, bool force); |
void resolveSplitsAndMerges(); |
void makeCompound(Instruction *, bool isSplit); |
inline void checkInterference(const RIG_Node *, Graph::EdgeIterator&); |
inline void insertOrderedTail(std::list<RIG_Node *>&, RIG_Node *); |
void checkList(std::list<RIG_Node *>&); |
private: |
std::stack<uint32_t> stack; |
// list headers for simplify() phase |
RIG_Node lo[2]; |
RIG_Node hi; |
Graph RIG; |
RIG_Node *nodes; |
unsigned int nodeCount; |
Function *func; |
Program *prog; |
static uint8_t relDegree[17][17]; |
RegisterSet regs; |
// need to fixup register id for participants of OP_MERGE/SPLIT |
std::list<Instruction *> merges; |
std::list<Instruction *> splits; |
SpillCodeInserter& spill; |
std::list<ValuePair> mustSpill; |
}; |
uint8_t GCRA::relDegree[17][17]; |
GCRA::RIG_Node::RIG_Node() : Node(NULL), next(this), prev(this) |
{ |
colors = 0; |
} |
void |
GCRA::printNodeInfo() const |
{ |
for (unsigned int i = 0; i < nodeCount; ++i) { |
if (!nodes[i].colors) |
continue; |
INFO("RIG_Node[%%%i]($[%u]%i): %u colors, weight %f, deg %u/%u\n X", |
i, |
nodes[i].f,nodes[i].reg,nodes[i].colors, |
nodes[i].weight, |
nodes[i].degree, nodes[i].degreeLimit); |
for (Graph::EdgeIterator ei = nodes[i].outgoing(); !ei.end(); ei.next()) |
INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); |
for (Graph::EdgeIterator ei = nodes[i].incident(); !ei.end(); ei.next()) |
INFO(" %%%i", RIG_Node::get(ei)->getValue()->id); |
INFO("\n"); |
} |
} |
void |
GCRA::RIG_Node::init(const RegisterSet& regs, LValue *lval) |
{ |
setValue(lval); |
if (lval->reg.data.id >= 0) |
lval->noSpill = lval->fixedReg = 1; |
colors = regs.units(lval->reg.file, lval->reg.size); |
f = lval->reg.file; |
reg = -1; |
if (lval->reg.data.id >= 0) |
reg = regs.idToUnits(lval); |
weight = std::numeric_limits<float>::infinity(); |
degree = 0; |
degreeLimit = regs.getFileSize(f, lval->reg.size); |
degreeLimit -= relDegree[1][colors] - 1; |
livei.insert(lval->livei); |
} |
bool |
GCRA::coalesceValues(Value *dst, Value *src, bool force) |
{ |
LValue *rep = dst->join->asLValue(); |
LValue *val = src->join->asLValue(); |
if (!force && val->reg.data.id >= 0) { |
rep = src->join->asLValue(); |
val = dst->join->asLValue(); |
} |
RIG_Node *nRep = &nodes[rep->id]; |
RIG_Node *nVal = &nodes[val->id]; |
if (src->reg.file != dst->reg.file) { |
if (!force) |
return false; |
WARN("forced coalescing of values in different files !\n"); |
} |
if (!force && dst->reg.size != src->reg.size) |
return false; |
if ((rep->reg.data.id >= 0) && (rep->reg.data.id != val->reg.data.id)) { |
if (force) { |
if (val->reg.data.id >= 0) |
WARN("forced coalescing of values in different fixed regs !\n"); |
} else { |
if (val->reg.data.id >= 0) |
return false; |
// make sure that there is no overlap with the fixed register of rep |
for (ArrayList::Iterator it = func->allLValues.iterator(); |
!it.end(); it.next()) { |
Value *reg = reinterpret_cast<Value *>(it.get())->asLValue(); |
assert(reg); |
if (reg->interfers(rep) && reg->livei.overlaps(nVal->livei)) |
return false; |
} |
} |
} |
if (!force && nRep->livei.overlaps(nVal->livei)) |
return false; |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "joining %%%i($%i) <- %%%i\n", |
rep->id, rep->reg.data.id, val->id); |
// set join pointer of all values joined with val |
for (Value::DefIterator def = val->defs.begin(); def != val->defs.end(); |
++def) |
(*def)->get()->join = rep; |
assert(rep->join == rep && val->join == rep); |
// add val's definitions to rep and extend the live interval of its RIG node |
rep->defs.insert(rep->defs.end(), val->defs.begin(), val->defs.end()); |
nRep->livei.unify(nVal->livei); |
return true; |
} |
bool |
GCRA::coalesce(ArrayList& insns) |
{ |
bool ret = doCoalesce(insns, JOIN_MASK_PHI); |
if (!ret) |
return false; |
switch (func->getProgram()->getTarget()->getChipset() & ~0xf) { |
case 0x50: |
case 0x80: |
case 0x90: |
case 0xa0: |
ret = doCoalesce(insns, JOIN_MASK_UNION | JOIN_MASK_TEX); |
break; |
case 0xc0: |
case 0xd0: |
case 0xe0: |
case 0xf0: |
case 0x100: |
case 0x110: |
ret = doCoalesce(insns, JOIN_MASK_UNION); |
break; |
default: |
break; |
} |
if (!ret) |
return false; |
return doCoalesce(insns, JOIN_MASK_MOV); |
} |
static inline uint8_t makeCompMask(int compSize, int base, int size) |
{ |
uint8_t m = ((1 << size) - 1) << base; |
switch (compSize) { |
case 1: |
return 0xff; |
case 2: |
m |= (m << 2); |
return (m << 4) | m; |
case 3: |
case 4: |
return (m << 4) | m; |
default: |
assert(compSize <= 8); |
return m; |
} |
} |
// Used when coalescing moves. The non-compound value will become one, e.g.: |
// mov b32 $r0 $r2 / merge b64 $r0d { $r0 $r1 } |
// split b64 { $r0 $r1 } $r0d / mov b64 $r0d f64 $r2d |
static inline void copyCompound(Value *dst, Value *src) |
{ |
LValue *ldst = dst->asLValue(); |
LValue *lsrc = src->asLValue(); |
if (ldst->compound && !lsrc->compound) { |
LValue *swap = lsrc; |
lsrc = ldst; |
ldst = swap; |
} |
ldst->compound = lsrc->compound; |
ldst->compMask = lsrc->compMask; |
} |
void |
GCRA::makeCompound(Instruction *insn, bool split) |
{ |
LValue *rep = (split ? insn->getSrc(0) : insn->getDef(0))->asLValue(); |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) { |
INFO("makeCompound(split = %i): ", split); |
insn->print(); |
} |
const unsigned int size = getNode(rep)->colors; |
unsigned int base = 0; |
if (!rep->compound) |
rep->compMask = 0xff; |
rep->compound = 1; |
for (int c = 0; split ? insn->defExists(c) : insn->srcExists(c); ++c) { |
LValue *val = (split ? insn->getDef(c) : insn->getSrc(c))->asLValue(); |
val->compound = 1; |
if (!val->compMask) |
val->compMask = 0xff; |
val->compMask &= makeCompMask(size, base, getNode(val)->colors); |
assert(val->compMask); |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "compound: %%%i:%02x <- %%%i:%02x\n", |
rep->id, rep->compMask, val->id, val->compMask); |
base += getNode(val)->colors; |
} |
assert(base == size); |
} |
bool |
GCRA::doCoalesce(ArrayList& insns, unsigned int mask) |
{ |
int c, n; |
for (n = 0; n < insns.getSize(); ++n) { |
Instruction *i; |
Instruction *insn = reinterpret_cast<Instruction *>(insns.get(n)); |
switch (insn->op) { |
case OP_PHI: |
if (!(mask & JOIN_MASK_PHI)) |
break; |
for (c = 0; insn->srcExists(c); ++c) |
if (!coalesceValues(insn->getDef(0), insn->getSrc(c), false)) { |
// this is bad |
ERROR("failed to coalesce phi operands\n"); |
return false; |
} |
break; |
case OP_UNION: |
case OP_MERGE: |
if (!(mask & JOIN_MASK_UNION)) |
break; |
for (c = 0; insn->srcExists(c); ++c) |
coalesceValues(insn->getDef(0), insn->getSrc(c), true); |
if (insn->op == OP_MERGE) { |
merges.push_back(insn); |
if (insn->srcExists(1)) |
makeCompound(insn, false); |
} |
break; |
case OP_SPLIT: |
if (!(mask & JOIN_MASK_UNION)) |
break; |
splits.push_back(insn); |
for (c = 0; insn->defExists(c); ++c) |
coalesceValues(insn->getSrc(0), insn->getDef(c), true); |
makeCompound(insn, true); |
break; |
case OP_MOV: |
if (!(mask & JOIN_MASK_MOV)) |
break; |
i = NULL; |
if (!insn->getDef(0)->uses.empty()) |
i = (*insn->getDef(0)->uses.begin())->getInsn(); |
// if this is a contraint-move there will only be a single use |
if (i && i->op == OP_MERGE) // do we really still need this ? |
break; |
i = insn->getSrc(0)->getUniqueInsn(); |
if (i && !i->constrainedDefs()) { |
if (coalesceValues(insn->getDef(0), insn->getSrc(0), false)) |
copyCompound(insn->getSrc(0), insn->getDef(0)); |
} |
break; |
case OP_TEX: |
case OP_TXB: |
case OP_TXL: |
case OP_TXF: |
case OP_TXQ: |
case OP_TXD: |
case OP_TXG: |
case OP_TXLQ: |
case OP_TEXCSAA: |
case OP_TEXPREP: |
if (!(mask & JOIN_MASK_TEX)) |
break; |
for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c) |
coalesceValues(insn->getDef(c), insn->getSrc(c), true); |
break; |
default: |
break; |
} |
} |
return true; |
} |
void |
GCRA::RIG_Node::addInterference(RIG_Node *node) |
{ |
this->degree += relDegree[node->colors][colors]; |
node->degree += relDegree[colors][node->colors]; |
this->attach(node, Graph::Edge::CROSS); |
} |
void |
GCRA::RIG_Node::addRegPreference(RIG_Node *node) |
{ |
prefRegs.push_back(node); |
} |
GCRA::GCRA(Function *fn, SpillCodeInserter& spill) : |
func(fn), |
regs(fn->getProgram()->getTarget()), |
spill(spill) |
{ |
prog = func->getProgram(); |
// initialize relative degrees array - i takes away from j |
for (int i = 1; i <= 16; ++i) |
for (int j = 1; j <= 16; ++j) |
relDegree[i][j] = j * ((i + j - 1) / j); |
} |
GCRA::~GCRA() |
{ |
if (nodes) |
delete[] nodes; |
} |
void |
GCRA::checkList(std::list<RIG_Node *>& lst) |
{ |
GCRA::RIG_Node *prev = NULL; |
for (std::list<RIG_Node *>::iterator it = lst.begin(); |
it != lst.end(); |
++it) { |
assert((*it)->getValue()->join == (*it)->getValue()); |
if (prev) |
assert(prev->livei.begin() <= (*it)->livei.begin()); |
prev = *it; |
} |
} |
void |
GCRA::insertOrderedTail(std::list<RIG_Node *>& list, RIG_Node *node) |
{ |
if (node->livei.isEmpty()) |
return; |
// only the intervals of joined values don't necessarily arrive in order |
std::list<RIG_Node *>::iterator prev, it; |
for (it = list.end(); it != list.begin(); it = prev) { |
prev = it; |
--prev; |
if ((*prev)->livei.begin() <= node->livei.begin()) |
break; |
} |
list.insert(it, node); |
} |
void |
GCRA::buildRIG(ArrayList& insns) |
{ |
std::list<RIG_Node *> values, active; |
for (std::deque<ValueDef>::iterator it = func->ins.begin(); |
it != func->ins.end(); ++it) |
insertOrderedTail(values, getNode(it->get()->asLValue())); |
for (int i = 0; i < insns.getSize(); ++i) { |
Instruction *insn = reinterpret_cast<Instruction *>(insns.get(i)); |
for (int d = 0; insn->defExists(d); ++d) |
if (insn->getDef(d)->rep() == insn->getDef(d)) |
insertOrderedTail(values, getNode(insn->getDef(d)->asLValue())); |
} |
checkList(values); |
while (!values.empty()) { |
RIG_Node *cur = values.front(); |
for (std::list<RIG_Node *>::iterator it = active.begin(); |
it != active.end();) { |
RIG_Node *node = *it; |
if (node->livei.end() <= cur->livei.begin()) { |
it = active.erase(it); |
} else { |
if (node->f == cur->f && node->livei.overlaps(cur->livei)) |
cur->addInterference(node); |
++it; |
} |
} |
values.pop_front(); |
active.push_back(cur); |
} |
} |
void |
GCRA::calculateSpillWeights() |
{ |
for (unsigned int i = 0; i < nodeCount; ++i) { |
RIG_Node *const n = &nodes[i]; |
if (!nodes[i].colors || nodes[i].livei.isEmpty()) |
continue; |
if (nodes[i].reg >= 0) { |
// update max reg |
regs.occupy(n->f, n->reg, n->colors); |
continue; |
} |
LValue *val = nodes[i].getValue(); |
if (!val->noSpill) { |
int rc = 0; |
for (Value::DefIterator it = val->defs.begin(); |
it != val->defs.end(); |
++it) |
rc += (*it)->get()->refCount(); |
nodes[i].weight = |
(float)rc * (float)rc / (float)nodes[i].livei.extent(); |
} |
if (nodes[i].degree < nodes[i].degreeLimit) { |
int l = 0; |
if (val->reg.size > 4) |
l = 1; |
DLLIST_ADDHEAD(&lo[l], &nodes[i]); |
} else { |
DLLIST_ADDHEAD(&hi, &nodes[i]); |
} |
} |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) |
printNodeInfo(); |
} |
void |
GCRA::simplifyEdge(RIG_Node *a, RIG_Node *b) |
{ |
bool move = b->degree >= b->degreeLimit; |
INFO_DBG(prog->dbgFlags, REG_ALLOC, |
"edge: (%%%i, deg %u/%u) >-< (%%%i, deg %u/%u)\n", |
a->getValue()->id, a->degree, a->degreeLimit, |
b->getValue()->id, b->degree, b->degreeLimit); |
b->degree -= relDegree[a->colors][b->colors]; |
move = move && b->degree < b->degreeLimit; |
if (move && !DLLIST_EMPTY(b)) { |
int l = (b->getValue()->reg.size > 4) ? 1 : 0; |
DLLIST_DEL(b); |
DLLIST_ADDTAIL(&lo[l], b); |
} |
} |
void |
GCRA::simplifyNode(RIG_Node *node) |
{ |
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) |
simplifyEdge(node, RIG_Node::get(ei)); |
for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) |
simplifyEdge(node, RIG_Node::get(ei)); |
DLLIST_DEL(node); |
stack.push(node->getValue()->id); |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "SIMPLIFY: pushed %%%i%s\n", |
node->getValue()->id, |
(node->degree < node->degreeLimit) ? "" : "(spill)"); |
} |
void |
GCRA::simplify() |
{ |
for (;;) { |
if (!DLLIST_EMPTY(&lo[0])) { |
do { |
simplifyNode(lo[0].next); |
} while (!DLLIST_EMPTY(&lo[0])); |
} else |
if (!DLLIST_EMPTY(&lo[1])) { |
simplifyNode(lo[1].next); |
} else |
if (!DLLIST_EMPTY(&hi)) { |
RIG_Node *best = hi.next; |
float bestScore = best->weight / (float)best->degree; |
// spill candidate |
for (RIG_Node *it = best->next; it != &hi; it = it->next) { |
float score = it->weight / (float)it->degree; |
if (score < bestScore) { |
best = it; |
bestScore = score; |
} |
} |
if (isinf(bestScore)) { |
ERROR("no viable spill candidates left\n"); |
break; |
} |
simplifyNode(best); |
} else { |
break; |
} |
} |
} |
void |
GCRA::checkInterference(const RIG_Node *node, Graph::EdgeIterator& ei) |
{ |
const RIG_Node *intf = RIG_Node::get(ei); |
if (intf->reg < 0) |
return; |
const LValue *vA = node->getValue(); |
const LValue *vB = intf->getValue(); |
const uint8_t intfMask = ((1 << intf->colors) - 1) << (intf->reg & 7); |
if (vA->compound | vB->compound) { |
// NOTE: this only works for >aligned< register tuples ! |
for (Value::DefCIterator D = vA->defs.begin(); D != vA->defs.end(); ++D) { |
for (Value::DefCIterator d = vB->defs.begin(); d != vB->defs.end(); ++d) { |
const LValue *vD = (*D)->get()->asLValue(); |
const LValue *vd = (*d)->get()->asLValue(); |
if (!vD->livei.overlaps(vd->livei)) { |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "(%%%i) X (%%%i): no overlap\n", |
vD->id, vd->id); |
continue; |
} |
uint8_t mask = vD->compound ? vD->compMask : ~0; |
if (vd->compound) { |
assert(vB->compound); |
mask &= vd->compMask & vB->compMask; |
} else { |
mask &= intfMask; |
} |
INFO_DBG(prog->dbgFlags, REG_ALLOC, |
"(%%%i)%02x X (%%%i)%02x & %02x: $r%i.%02x\n", |
vD->id, |
vD->compound ? vD->compMask : 0xff, |
vd->id, |
vd->compound ? vd->compMask : intfMask, |
vB->compMask, intf->reg & ~7, mask); |
if (mask) |
regs.occupyMask(node->f, intf->reg & ~7, mask); |
} |
} |
} else { |
INFO_DBG(prog->dbgFlags, REG_ALLOC, |
"(%%%i) X (%%%i): $r%i + %u\n", |
vA->id, vB->id, intf->reg, intf->colors); |
regs.occupy(node->f, intf->reg, intf->colors); |
} |
} |
bool |
GCRA::selectRegisters() |
{ |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nSELECT phase\n"); |
while (!stack.empty()) { |
RIG_Node *node = &nodes[stack.top()]; |
stack.pop(); |
regs.reset(node->f); |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "\nNODE[%%%i, %u colors]\n", |
node->getValue()->id, node->colors); |
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) |
checkInterference(node, ei); |
for (Graph::EdgeIterator ei = node->incident(); !ei.end(); ei.next()) |
checkInterference(node, ei); |
if (!node->prefRegs.empty()) { |
for (std::list<RIG_Node *>::const_iterator it = node->prefRegs.begin(); |
it != node->prefRegs.end(); |
++it) { |
if ((*it)->reg >= 0 && |
regs.testOccupy(node->f, (*it)->reg, node->colors)) { |
node->reg = (*it)->reg; |
break; |
} |
} |
} |
if (node->reg >= 0) |
continue; |
LValue *lval = node->getValue(); |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) |
regs.print(); |
bool ret = regs.assign(node->reg, node->f, node->colors); |
if (ret) { |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "assigned reg %i\n", node->reg); |
lval->compMask = node->getCompMask(); |
} else { |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "must spill: %%%i (size %u)\n", |
lval->id, lval->reg.size); |
Symbol *slot = NULL; |
if (lval->reg.file == FILE_GPR) |
slot = spill.assignSlot(node->livei, lval->reg.size); |
mustSpill.push_back(ValuePair(lval, slot)); |
} |
} |
if (!mustSpill.empty()) |
return false; |
for (unsigned int i = 0; i < nodeCount; ++i) { |
LValue *lval = nodes[i].getValue(); |
if (nodes[i].reg >= 0 && nodes[i].colors > 0) |
lval->reg.data.id = |
regs.unitsToId(nodes[i].f, nodes[i].reg, lval->reg.size); |
} |
return true; |
} |
bool |
GCRA::allocateRegisters(ArrayList& insns) |
{ |
bool ret; |
INFO_DBG(prog->dbgFlags, REG_ALLOC, |
"allocateRegisters to %u instructions\n", insns.getSize()); |
nodeCount = func->allLValues.getSize(); |
nodes = new RIG_Node[nodeCount]; |
if (!nodes) |
return false; |
for (unsigned int i = 0; i < nodeCount; ++i) { |
LValue *lval = reinterpret_cast<LValue *>(func->allLValues.get(i)); |
if (lval) { |
nodes[i].init(regs, lval); |
RIG.insert(&nodes[i]); |
} |
} |
// coalesce first, we use only 1 RIG node for a group of joined values |
ret = coalesce(insns); |
if (!ret) |
goto out; |
if (func->getProgram()->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) |
func->printLiveIntervals(); |
buildRIG(insns); |
calculateSpillWeights(); |
simplify(); |
ret = selectRegisters(); |
if (!ret) { |
INFO_DBG(prog->dbgFlags, REG_ALLOC, |
"selectRegisters failed, inserting spill code ...\n"); |
regs.reset(FILE_GPR, true); |
spill.run(mustSpill); |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) |
func->print(); |
} else { |
prog->maxGPR = std::max(prog->maxGPR, regs.getMaxAssigned(FILE_GPR)); |
} |
out: |
cleanup(ret); |
return ret; |
} |
void |
GCRA::cleanup(const bool success) |
{ |
mustSpill.clear(); |
for (ArrayList::Iterator it = func->allLValues.iterator(); |
!it.end(); it.next()) { |
LValue *lval = reinterpret_cast<LValue *>(it.get()); |
lval->livei.clear(); |
lval->compound = 0; |
lval->compMask = 0; |
if (lval->join == lval) |
continue; |
if (success) { |
lval->reg.data.id = lval->join->reg.data.id; |
} else { |
for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); |
++d) |
lval->join->defs.remove(*d); |
lval->join = lval; |
} |
} |
if (success) |
resolveSplitsAndMerges(); |
splits.clear(); // avoid duplicate entries on next coalesce pass |
merges.clear(); |
delete[] nodes; |
nodes = NULL; |
} |
Symbol * |
SpillCodeInserter::assignSlot(const Interval &livei, const unsigned int size) |
{ |
SpillSlot slot; |
int32_t offsetBase = stackSize; |
int32_t offset; |
std::list<SpillSlot>::iterator pos = slots.end(), it = slots.begin(); |
if (offsetBase % size) |
offsetBase += size - (offsetBase % size); |
slot.sym = NULL; |
for (offset = offsetBase; offset < stackSize; offset += size) { |
const int32_t entryEnd = offset + size; |
while (it != slots.end() && it->offset < offset) |
++it; |
if (it == slots.end()) // no slots left |
break; |
std::list<SpillSlot>::iterator bgn = it; |
while (it != slots.end() && it->offset < entryEnd) { |
it->occup.print(); |
if (it->occup.overlaps(livei)) |
break; |
++it; |
} |
if (it == slots.end() || it->offset >= entryEnd) { |
// fits |
for (; bgn != slots.end() && bgn->offset < entryEnd; ++bgn) { |
bgn->occup.insert(livei); |
if (bgn->size() == size) |
slot.sym = bgn->sym; |
} |
break; |
} |
} |
if (!slot.sym) { |
stackSize = offset + size; |
slot.offset = offset; |
slot.sym = new_Symbol(func->getProgram(), FILE_MEMORY_LOCAL); |
if (!func->stackPtr) |
offset += func->tlsBase; |
slot.sym->setAddress(NULL, offset); |
slot.sym->reg.size = size; |
slots.insert(pos, slot)->occup.insert(livei); |
} |
return slot.sym; |
} |
Value * |
SpillCodeInserter::offsetSlot(Value *base, const LValue *lval) |
{ |
if (!lval->compound || (lval->compMask & 0x1)) |
return base; |
Value *slot = cloneShallow(func, base); |
slot->reg.data.offset += (ffs(lval->compMask) - 1) * lval->reg.size; |
slot->reg.size = lval->reg.size; |
return slot; |
} |
void |
SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval) |
{ |
const DataType ty = typeOfSize(lval->reg.size); |
slot = offsetSlot(slot, lval); |
Instruction *st; |
if (slot->reg.file == FILE_MEMORY_LOCAL) { |
st = new_Instruction(func, OP_STORE, ty); |
st->setSrc(0, slot); |
st->setSrc(1, lval); |
lval->noSpill = 1; |
} else { |
st = new_Instruction(func, OP_CVT, ty); |
st->setDef(0, slot); |
st->setSrc(0, lval); |
} |
defi->bb->insertAfter(defi, st); |
} |
LValue * |
SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot) |
{ |
const DataType ty = typeOfSize(lval->reg.size); |
slot = offsetSlot(slot, lval); |
lval = cloneShallow(func, lval); |
Instruction *ld; |
if (slot->reg.file == FILE_MEMORY_LOCAL) { |
lval->noSpill = 1; |
ld = new_Instruction(func, OP_LOAD, ty); |
} else { |
ld = new_Instruction(func, OP_CVT, ty); |
} |
ld->setDef(0, lval); |
ld->setSrc(0, slot); |
usei->bb->insertBefore(usei, ld); |
return lval; |
} |
// For each value that is to be spilled, go through all its definitions. |
// A value can have multiple definitions if it has been coalesced before. |
// For each definition, first go through all its uses and insert an unspill |
// instruction before it, then replace the use with the temporary register. |
// Unspill can be either a load from memory or simply a move to another |
// register file. |
// For "Pseudo" instructions (like PHI, SPLIT, MERGE) we can erase the use |
// if we have spilled to a memory location, or simply with the new register. |
// No load or conversion instruction should be needed. |
bool |
SpillCodeInserter::run(const std::list<ValuePair>& lst) |
{ |
for (std::list<ValuePair>::const_iterator it = lst.begin(); it != lst.end(); |
++it) { |
LValue *lval = it->first->asLValue(); |
Symbol *mem = it->second ? it->second->asSym() : NULL; |
// Keep track of which instructions to delete later. Deleting them |
// inside the loop is unsafe since a single instruction may have |
// multiple destinations that all need to be spilled (like OP_SPLIT). |
std::tr1::unordered_set<Instruction *> to_del; |
for (Value::DefIterator d = lval->defs.begin(); d != lval->defs.end(); |
++d) { |
Value *slot = mem ? |
static_cast<Value *>(mem) : new_LValue(func, FILE_GPR); |
Value *tmp = NULL; |
Instruction *last = NULL; |
LValue *dval = (*d)->get()->asLValue(); |
Instruction *defi = (*d)->getInsn(); |
// Unspill at each use *before* inserting spill instructions, |
// we don't want to have the spill instructions in the use list here. |
while (!dval->uses.empty()) { |
ValueRef *u = *dval->uses.begin(); |
Instruction *usei = u->getInsn(); |
assert(usei); |
if (usei->isPseudo()) { |
tmp = (slot->reg.file == FILE_MEMORY_LOCAL) ? NULL : slot; |
last = NULL; |
} else |
if (!last || usei != last->next) { // TODO: sort uses |
tmp = unspill(usei, dval, slot); |
last = usei; |
} |
u->set(tmp); |
} |
assert(defi); |
if (defi->isPseudo()) { |
d = lval->defs.erase(d); |
--d; |
if (slot->reg.file == FILE_MEMORY_LOCAL) |
to_del.insert(defi); |
else |
defi->setDef(0, slot); |
} else { |
spill(defi, slot, dval); |
} |
} |
for (std::tr1::unordered_set<Instruction *>::const_iterator it = to_del.begin(); |
it != to_del.end(); ++it) |
delete_Instruction(func->getProgram(), *it); |
} |
// TODO: We're not trying to reuse old slots in a potential next iteration. |
// We have to update the slots' livei intervals to be able to do that. |
stackBase = stackSize; |
slots.clear(); |
return true; |
} |
bool |
RegAlloc::exec() |
{ |
for (IteratorRef it = prog->calls.iteratorDFS(false); |
!it->end(); it->next()) { |
func = Function::get(reinterpret_cast<Graph::Node *>(it->get())); |
func->tlsBase = prog->tlsSize; |
if (!execFunc()) |
return false; |
prog->tlsSize += func->tlsSize; |
} |
return true; |
} |
bool |
RegAlloc::execFunc() |
{ |
InsertConstraintsPass insertConstr; |
PhiMovesPass insertPhiMoves; |
ArgumentMovesPass insertArgMoves; |
BuildIntervalsPass buildIntervals; |
SpillCodeInserter insertSpills(func); |
GCRA gcra(func, insertSpills); |
unsigned int i, retries; |
bool ret; |
if (!func->ins.empty()) { |
// Insert a nop at the entry so inputs only used by the first instruction |
// don't count as having an empty live range. |
Instruction *nop = new_Instruction(func, OP_NOP, TYPE_NONE); |
BasicBlock::get(func->cfg.getRoot())->insertHead(nop); |
} |
ret = insertConstr.exec(func); |
if (!ret) |
goto out; |
ret = insertPhiMoves.run(func); |
if (!ret) |
goto out; |
ret = insertArgMoves.run(func); |
if (!ret) |
goto out; |
// TODO: need to fix up spill slot usage ranges to support > 1 retry |
for (retries = 0; retries < 3; ++retries) { |
if (retries && (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC)) |
INFO("Retry: %i\n", retries); |
if (prog->dbgFlags & NV50_IR_DEBUG_REG_ALLOC) |
func->print(); |
// spilling to registers may add live ranges, need to rebuild everything |
ret = true; |
for (sequence = func->cfg.nextSequence(), i = 0; |
ret && i <= func->loopNestingBound; |
sequence = func->cfg.nextSequence(), ++i) |
ret = buildLiveSets(BasicBlock::get(func->cfg.getRoot())); |
// reset marker |
for (ArrayList::Iterator bi = func->allBBlocks.iterator(); |
!bi.end(); bi.next()) |
BasicBlock::get(bi)->liveSet.marker = false; |
if (!ret) |
break; |
func->orderInstructions(this->insns); |
ret = buildIntervals.run(func); |
if (!ret) |
break; |
ret = gcra.allocateRegisters(insns); |
if (ret) |
break; // success |
} |
INFO_DBG(prog->dbgFlags, REG_ALLOC, "RegAlloc done: %i\n", ret); |
func->tlsSize = insertSpills.getStackSize(); |
out: |
return ret; |
} |
// TODO: check if modifying Instruction::join here breaks anything |
void |
GCRA::resolveSplitsAndMerges() |
{ |
for (std::list<Instruction *>::iterator it = splits.begin(); |
it != splits.end(); |
++it) { |
Instruction *split = *it; |
unsigned int reg = regs.idToBytes(split->getSrc(0)); |
for (int d = 0; split->defExists(d); ++d) { |
Value *v = split->getDef(d); |
v->reg.data.id = regs.bytesToId(v, reg); |
v->join = v; |
reg += v->reg.size; |
} |
} |
splits.clear(); |
for (std::list<Instruction *>::iterator it = merges.begin(); |
it != merges.end(); |
++it) { |
Instruction *merge = *it; |
unsigned int reg = regs.idToBytes(merge->getDef(0)); |
for (int s = 0; merge->srcExists(s); ++s) { |
Value *v = merge->getSrc(s); |
v->reg.data.id = regs.bytesToId(v, reg); |
v->join = v; |
// If the value is defined by a phi/union node, we also need to |
// perform the same fixup on that node's sources, since after RA |
// their registers should be identical. |
if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) { |
Instruction *phi = v->getInsn(); |
for (int phis = 0; phi->srcExists(phis); ++phis) |
phi->getSrc(phis)->join = v; |
} |
reg += v->reg.size; |
} |
} |
merges.clear(); |
} |
bool Program::registerAllocation() |
{ |
RegAlloc ra(this); |
return ra.exec(); |
} |
bool |
RegAlloc::InsertConstraintsPass::exec(Function *ir) |
{ |
constrList.clear(); |
bool ret = run(ir, true, true); |
if (ret) |
ret = insertConstraintMoves(); |
return ret; |
} |
// TODO: make part of texture insn |
void |
RegAlloc::InsertConstraintsPass::textureMask(TexInstruction *tex) |
{ |
Value *def[4]; |
int c, k, d; |
uint8_t mask = 0; |
for (d = 0, k = 0, c = 0; c < 4; ++c) { |
if (!(tex->tex.mask & (1 << c))) |
continue; |
if (tex->getDef(k)->refCount()) { |
mask |= 1 << c; |
def[d++] = tex->getDef(k); |
} |
++k; |
} |
tex->tex.mask = mask; |
for (c = 0; c < d; ++c) |
tex->setDef(c, def[c]); |
for (; c < 4; ++c) |
tex->setDef(c, NULL); |
} |
bool |
RegAlloc::InsertConstraintsPass::detectConflict(Instruction *cst, int s) |
{ |
Value *v = cst->getSrc(s); |
// current register allocation can't handle it if a value participates in |
// multiple constraints |
for (Value::UseIterator it = v->uses.begin(); it != v->uses.end(); ++it) { |
if (cst != (*it)->getInsn()) |
return true; |
} |
// can start at s + 1 because detectConflict is called on all sources |
for (int c = s + 1; cst->srcExists(c); ++c) |
if (v == cst->getSrc(c)) |
return true; |
Instruction *defi = v->getInsn(); |
return (!defi || defi->constrainedDefs()); |
} |
void |
RegAlloc::InsertConstraintsPass::addConstraint(Instruction *i, int s, int n) |
{ |
Instruction *cst; |
int d; |
// first, look for an existing identical constraint op |
for (std::list<Instruction *>::iterator it = constrList.begin(); |
it != constrList.end(); |
++it) { |
cst = (*it); |
if (!i->bb->dominatedBy(cst->bb)) |
break; |
for (d = 0; d < n; ++d) |
if (cst->getSrc(d) != i->getSrc(d + s)) |
break; |
if (d >= n) { |
for (d = 0; d < n; ++d, ++s) |
i->setSrc(s, cst->getDef(d)); |
return; |
} |
} |
cst = new_Instruction(func, OP_CONSTRAINT, i->dType); |
for (d = 0; d < n; ++s, ++d) { |
cst->setDef(d, new_LValue(func, FILE_GPR)); |
cst->setSrc(d, i->getSrc(s)); |
i->setSrc(s, cst->getDef(d)); |
} |
i->bb->insertBefore(i, cst); |
constrList.push_back(cst); |
} |
// Add a dummy use of the pointer source of >= 8 byte loads after the load |
// to prevent it from being assigned a register which overlapping the load's |
// destination, which would produce random corruptions. |
void |
RegAlloc::InsertConstraintsPass::addHazard(Instruction *i, const ValueRef *src) |
{ |
Instruction *hzd = new_Instruction(func, OP_NOP, TYPE_NONE); |
hzd->setSrc(0, src->get()); |
i->bb->insertAfter(i, hzd); |
} |
// b32 { %r0 %r1 %r2 %r3 } -> b128 %r0q |
void |
RegAlloc::InsertConstraintsPass::condenseDefs(Instruction *insn) |
{ |
uint8_t size = 0; |
int n; |
for (n = 0; insn->defExists(n) && insn->def(n).getFile() == FILE_GPR; ++n) |
size += insn->getDef(n)->reg.size; |
if (n < 2) |
return; |
LValue *lval = new_LValue(func, FILE_GPR); |
lval->reg.size = size; |
Instruction *split = new_Instruction(func, OP_SPLIT, typeOfSize(size)); |
split->setSrc(0, lval); |
for (int d = 0; d < n; ++d) { |
split->setDef(d, insn->getDef(d)); |
insn->setDef(d, NULL); |
} |
insn->setDef(0, lval); |
for (int k = 1, d = n; insn->defExists(d); ++d, ++k) { |
insn->setDef(k, insn->getDef(d)); |
insn->setDef(d, NULL); |
} |
// carry over predicate if any (mainly for OP_UNION uses) |
split->setPredicate(insn->cc, insn->getPredicate()); |
insn->bb->insertAfter(insn, split); |
constrList.push_back(split); |
} |
void |
RegAlloc::InsertConstraintsPass::condenseSrcs(Instruction *insn, |
const int a, const int b) |
{ |
uint8_t size = 0; |
if (a >= b) |
return; |
for (int s = a; s <= b; ++s) |
size += insn->getSrc(s)->reg.size; |
if (!size) |
return; |
LValue *lval = new_LValue(func, FILE_GPR); |
lval->reg.size = size; |
Value *save[3]; |
insn->takeExtraSources(0, save); |
Instruction *merge = new_Instruction(func, OP_MERGE, typeOfSize(size)); |
merge->setDef(0, lval); |
for (int s = a, i = 0; s <= b; ++s, ++i) { |
merge->setSrc(i, insn->getSrc(s)); |
insn->setSrc(s, NULL); |
} |
insn->setSrc(a, lval); |
for (int k = a + 1, s = b + 1; insn->srcExists(s); ++s, ++k) { |
insn->setSrc(k, insn->getSrc(s)); |
insn->setSrc(s, NULL); |
} |
insn->bb->insertBefore(insn, merge); |
insn->putExtraSources(0, save); |
constrList.push_back(merge); |
} |
void |
RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) |
{ |
int n, s; |
if (isTextureOp(tex->op)) |
textureMask(tex); |
condenseDefs(tex); |
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { |
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); |
} else |
if (isTextureOp(tex->op)) { |
if (tex->op != OP_TXQ) { |
s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); |
if (tex->op == OP_TXD) { |
// Indirect handle belongs in the first arg |
if (tex->tex.rIndirectSrc >= 0) |
s++; |
if (!tex->tex.target.isArray() && tex->tex.useOffsets) |
s++; |
} |
n = tex->srcCount(0xff) - s; |
} else { |
s = tex->srcCount(0xff); |
n = 0; |
} |
if (s > 1) |
condenseSrcs(tex, 0, s - 1); |
if (n > 1) // NOTE: first call modified positions already |
condenseSrcs(tex, 1, n); |
} |
} |
void |
RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) |
{ |
if (isTextureOp(tex->op)) |
textureMask(tex); |
condenseDefs(tex); |
if (tex->op == OP_SUSTB || tex->op == OP_SUSTP) { |
condenseSrcs(tex, 3, (3 + typeSizeof(tex->dType) / 4) - 1); |
} else |
if (isTextureOp(tex->op)) { |
int n = tex->srcCount(0xff, true); |
if (n > 4) { |
condenseSrcs(tex, 0, 3); |
if (n > 5) // NOTE: first call modified positions already |
condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); |
} else |
if (n > 1) { |
condenseSrcs(tex, 0, n - 1); |
} |
} |
} |
void |
RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) |
{ |
int n, s; |
textureMask(tex); |
if (tex->op == OP_TXQ) { |
s = tex->srcCount(0xff); |
n = 0; |
} else { |
s = tex->tex.target.getArgCount() - tex->tex.target.isMS(); |
if (!tex->tex.target.isArray() && |
(tex->tex.rIndirectSrc >= 0 || tex->tex.sIndirectSrc >= 0)) |
++s; |
if (tex->op == OP_TXD && tex->tex.useOffsets) |
++s; |
n = tex->srcCount(0xff) - s; |
assert(n <= 4); |
} |
if (s > 1) |
condenseSrcs(tex, 0, s - 1); |
if (n > 1) // NOTE: first call modified positions already |
condenseSrcs(tex, 1, n); |
condenseDefs(tex); |
} |
void |
RegAlloc::InsertConstraintsPass::texConstraintNV50(TexInstruction *tex) |
{ |
Value *pred = tex->getPredicate(); |
if (pred) |
tex->setPredicate(tex->cc, NULL); |
textureMask(tex); |
assert(tex->defExists(0) && tex->srcExists(0)); |
// make src and def count match |
int c; |
for (c = 0; tex->srcExists(c) || tex->defExists(c); ++c) { |
if (!tex->srcExists(c)) |
tex->setSrc(c, new_LValue(func, tex->getSrc(0)->asLValue())); |
if (!tex->defExists(c)) |
tex->setDef(c, new_LValue(func, tex->getDef(0)->asLValue())); |
} |
if (pred) |
tex->setPredicate(tex->cc, pred); |
condenseDefs(tex); |
condenseSrcs(tex, 0, c - 1); |
} |
// Insert constraint markers for instructions whose multiple sources must be |
// located in consecutive registers. |
bool |
RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) |
{ |
TexInstruction *tex; |
Instruction *next; |
int s, size; |
targ = bb->getProgram()->getTarget(); |
for (Instruction *i = bb->getEntry(); i; i = next) { |
next = i->next; |
if ((tex = i->asTex())) { |
switch (targ->getChipset() & ~0xf) { |
case 0x50: |
case 0x80: |
case 0x90: |
case 0xa0: |
texConstraintNV50(tex); |
break; |
case 0xc0: |
case 0xd0: |
texConstraintNVC0(tex); |
break; |
case 0xe0: |
case 0xf0: |
case 0x100: |
texConstraintNVE0(tex); |
break; |
case 0x110: |
texConstraintGM107(tex); |
break; |
default: |
break; |
} |
} else |
if (i->op == OP_EXPORT || i->op == OP_STORE) { |
for (size = typeSizeof(i->dType), s = 1; size > 0; ++s) { |
assert(i->srcExists(s)); |
size -= i->getSrc(s)->reg.size; |
} |
condenseSrcs(i, 1, s - 1); |
} else |
if (i->op == OP_LOAD || i->op == OP_VFETCH) { |
condenseDefs(i); |
if (i->src(0).isIndirect(0) && typeSizeof(i->dType) >= 8) |
addHazard(i, i->src(0).getIndirect(0)); |
} else |
if (i->op == OP_UNION || |
i->op == OP_MERGE || |
i->op == OP_SPLIT) { |
constrList.push_back(i); |
} |
} |
return true; |
} |
// Insert extra moves so that, if multiple register constraints on a value are |
// in conflict, these conflicts can be resolved. |
bool |
RegAlloc::InsertConstraintsPass::insertConstraintMoves() |
{ |
for (std::list<Instruction *>::iterator it = constrList.begin(); |
it != constrList.end(); |
++it) { |
Instruction *cst = *it; |
Instruction *mov; |
if (cst->op == OP_SPLIT && 0) { |
// spilling splits is annoying, just make sure they're separate |
for (int d = 0; cst->defExists(d); ++d) { |
if (!cst->getDef(d)->refCount()) |
continue; |
LValue *lval = new_LValue(func, cst->def(d).getFile()); |
const uint8_t size = cst->def(d).getSize(); |
lval->reg.size = size; |
mov = new_Instruction(func, OP_MOV, typeOfSize(size)); |
mov->setSrc(0, lval); |
mov->setDef(0, cst->getDef(d)); |
cst->setDef(d, mov->getSrc(0)); |
cst->bb->insertAfter(cst, mov); |
cst->getSrc(0)->asLValue()->noSpill = 1; |
mov->getSrc(0)->asLValue()->noSpill = 1; |
} |
} else |
if (cst->op == OP_MERGE || cst->op == OP_UNION) { |
for (int s = 0; cst->srcExists(s); ++s) { |
const uint8_t size = cst->src(s).getSize(); |
if (!cst->getSrc(s)->defs.size()) { |
mov = new_Instruction(func, OP_NOP, typeOfSize(size)); |
mov->setDef(0, cst->getSrc(s)); |
cst->bb->insertBefore(cst, mov); |
continue; |
} |
assert(cst->getSrc(s)->defs.size() == 1); // still SSA |
Instruction *defi = cst->getSrc(s)->defs.front()->getInsn(); |
// catch some cases where don't really need MOVs |
if (cst->getSrc(s)->refCount() == 1 && !defi->constrainedDefs()) |
continue; |
LValue *lval = new_LValue(func, cst->src(s).getFile()); |
lval->reg.size = size; |
mov = new_Instruction(func, OP_MOV, typeOfSize(size)); |
mov->setDef(0, lval); |
mov->setSrc(0, cst->getSrc(s)); |
cst->setSrc(s, mov->getDef(0)); |
cst->bb->insertBefore(cst, mov); |
cst->getDef(0)->asLValue()->noSpill = 1; // doesn't help |
if (cst->op == OP_UNION) |
mov->setPredicate(defi->cc, defi->getPredicate()); |
} |
} |
} |
return true; |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp |
---|
0,0 → 1,552 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
namespace nv50_ir { |
// Converts nv50 IR generated from TGSI to SSA form. |
// DominatorTree implements an algorithm for finding immediate dominators, |
// as described by T. Lengauer & R. Tarjan. |
class DominatorTree : public Graph |
{ |
public: |
DominatorTree(Graph *cfg); |
~DominatorTree() { } |
bool dominates(BasicBlock *, BasicBlock *); |
void findDominanceFrontiers(); |
private: |
void build(); |
void buildDFS(Node *); |
void squash(int); |
inline void link(int, int); |
inline int eval(int); |
void debugPrint(); |
Graph *cfg; |
Node **vert; |
int *data; |
const int count; |
#define SEMI(i) (data[(i) + 0 * count]) |
#define ANCESTOR(i) (data[(i) + 1 * count]) |
#define PARENT(i) (data[(i) + 2 * count]) |
#define LABEL(i) (data[(i) + 3 * count]) |
#define DOM(i) (data[(i) + 4 * count]) |
}; |
void DominatorTree::debugPrint() |
{ |
for (int i = 0; i < count; ++i) { |
INFO("SEMI(%i) = %i\n", i, SEMI(i)); |
INFO("ANCESTOR(%i) = %i\n", i, ANCESTOR(i)); |
INFO("PARENT(%i) = %i\n", i, PARENT(i)); |
INFO("LABEL(%i) = %i\n", i, LABEL(i)); |
INFO("DOM(%i) = %i\n", i, DOM(i)); |
} |
} |
DominatorTree::DominatorTree(Graph *cfgraph) : cfg(cfgraph), |
count(cfg->getSize()) |
{ |
int i = 0; |
vert = new Node * [count]; |
data = new int[5 * count]; |
for (IteratorRef it = cfg->iteratorDFS(true); !it->end(); it->next(), ++i) { |
vert[i] = reinterpret_cast<Node *>(it->get()); |
vert[i]->tag = i; |
LABEL(i) = i; |
SEMI(i) = ANCESTOR(i) = -1; |
} |
build(); |
delete[] vert; |
delete[] data; |
} |
void DominatorTree::buildDFS(Graph::Node *node) |
{ |
SEMI(node->tag) = node->tag; |
for (Graph::EdgeIterator ei = node->outgoing(); !ei.end(); ei.next()) { |
if (SEMI(ei.getNode()->tag) < 0) { |
buildDFS(ei.getNode()); |
PARENT(ei.getNode()->tag) = node->tag; |
} |
} |
} |
void DominatorTree::squash(int v) |
{ |
if (ANCESTOR(ANCESTOR(v)) >= 0) { |
squash(ANCESTOR(v)); |
if (SEMI(LABEL(ANCESTOR(v))) < SEMI(LABEL(v))) |
LABEL(v) = LABEL(ANCESTOR(v)); |
ANCESTOR(v) = ANCESTOR(ANCESTOR(v)); |
} |
} |
int DominatorTree::eval(int v) |
{ |
if (ANCESTOR(v) < 0) |
return v; |
squash(v); |
return LABEL(v); |
} |
void DominatorTree::link(int v, int w) |
{ |
ANCESTOR(w) = v; |
} |
void DominatorTree::build() |
{ |
DLList *bucket = new DLList[count]; |
Node *nv, *nw; |
int p, u, v, w; |
buildDFS(cfg->getRoot()); |
for (w = count - 1; w >= 1; --w) { |
nw = vert[w]; |
assert(nw->tag == w); |
for (Graph::EdgeIterator ei = nw->incident(); !ei.end(); ei.next()) { |
nv = ei.getNode(); |
v = nv->tag; |
u = eval(v); |
if (SEMI(u) < SEMI(w)) |
SEMI(w) = SEMI(u); |
} |
p = PARENT(w); |
bucket[SEMI(w)].insert(nw); |
link(p, w); |
for (DLList::Iterator it = bucket[p].iterator(); !it.end(); it.erase()) { |
v = reinterpret_cast<Node *>(it.get())->tag; |
u = eval(v); |
DOM(v) = (SEMI(u) < SEMI(v)) ? u : p; |
} |
} |
for (w = 1; w < count; ++w) { |
if (DOM(w) != SEMI(w)) |
DOM(w) = DOM(DOM(w)); |
} |
DOM(0) = 0; |
insert(&BasicBlock::get(cfg->getRoot())->dom); |
do { |
p = 0; |
for (v = 1; v < count; ++v) { |
nw = &BasicBlock::get(vert[DOM(v)])->dom;; |
nv = &BasicBlock::get(vert[v])->dom; |
if (nw->getGraph() && !nv->getGraph()) { |
++p; |
nw->attach(nv, Graph::Edge::TREE); |
} |
} |
} while (p); |
delete[] bucket; |
} |
#undef SEMI |
#undef ANCESTOR |
#undef PARENT |
#undef LABEL |
#undef DOM |
void DominatorTree::findDominanceFrontiers() |
{ |
BasicBlock *bb; |
for (IteratorRef dtIt = iteratorDFS(false); !dtIt->end(); dtIt->next()) { |
EdgeIterator succIt, chldIt; |
bb = BasicBlock::get(reinterpret_cast<Node *>(dtIt->get())); |
bb->getDF().clear(); |
for (succIt = bb->cfg.outgoing(); !succIt.end(); succIt.next()) { |
BasicBlock *dfLocal = BasicBlock::get(succIt.getNode()); |
if (dfLocal->idom() != bb) |
bb->getDF().insert(dfLocal); |
} |
for (chldIt = bb->dom.outgoing(); !chldIt.end(); chldIt.next()) { |
BasicBlock *cb = BasicBlock::get(chldIt.getNode()); |
DLList::Iterator dfIt = cb->getDF().iterator(); |
for (; !dfIt.end(); dfIt.next()) { |
BasicBlock *dfUp = BasicBlock::get(dfIt); |
if (dfUp->idom() != bb) |
bb->getDF().insert(dfUp); |
} |
} |
} |
} |
// liveIn(bb) = usedBeforeAssigned(bb) U (liveOut(bb) - assigned(bb)) |
void |
Function::buildLiveSetsPreSSA(BasicBlock *bb, const int seq) |
{ |
Function *f = bb->getFunction(); |
BitSet usedBeforeAssigned(allLValues.getSize(), true); |
BitSet assigned(allLValues.getSize(), true); |
bb->liveSet.allocate(allLValues.getSize(), false); |
int n = 0; |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
BasicBlock *out = BasicBlock::get(ei.getNode()); |
if (out == bb) |
continue; |
if (out->cfg.visit(seq)) |
buildLiveSetsPreSSA(out, seq); |
if (!n++) |
bb->liveSet = out->liveSet; |
else |
bb->liveSet |= out->liveSet; |
} |
if (!n && !bb->liveSet.marker) |
bb->liveSet.fill(0); |
bb->liveSet.marker = true; |
for (Instruction *i = bb->getEntry(); i; i = i->next) { |
for (int s = 0; i->srcExists(s); ++s) |
if (i->getSrc(s)->asLValue() && !assigned.test(i->getSrc(s)->id)) |
usedBeforeAssigned.set(i->getSrc(s)->id); |
for (int d = 0; i->defExists(d); ++d) |
assigned.set(i->getDef(d)->id); |
} |
if (bb == BasicBlock::get(f->cfgExit)) { |
for (std::deque<ValueRef>::iterator it = f->outs.begin(); |
it != f->outs.end(); ++it) { |
if (!assigned.test(it->get()->id)) |
usedBeforeAssigned.set(it->get()->id); |
} |
} |
bb->liveSet.andNot(assigned); |
bb->liveSet |= usedBeforeAssigned; |
} |
void |
Function::buildDefSetsPreSSA(BasicBlock *bb, const int seq) |
{ |
bb->defSet.allocate(allLValues.getSize(), !bb->liveSet.marker); |
bb->liveSet.marker = true; |
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { |
BasicBlock *in = BasicBlock::get(ei.getNode()); |
if (in->cfg.visit(seq)) |
buildDefSetsPreSSA(in, seq); |
bb->defSet |= in->defSet; |
} |
for (Instruction *i = bb->getEntry(); i; i = i->next) { |
for (int d = 0; i->defExists(d); ++d) |
bb->defSet.set(i->getDef(d)->id); |
} |
} |
class RenamePass |
{ |
public: |
RenamePass(Function *); |
~RenamePass(); |
bool run(); |
void search(BasicBlock *); |
inline LValue *getStackTop(Value *); |
LValue *mkUndefined(Value *); |
private: |
Stack *stack; |
Function *func; |
Program *prog; |
}; |
bool |
Program::convertToSSA() |
{ |
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { |
Function *fn = reinterpret_cast<Function *>(fi.get()); |
if (!fn->convertToSSA()) |
return false; |
} |
return true; |
} |
// XXX: add edge from entry to exit ? |
// Efficiently Computing Static Single Assignment Form and |
// the Control Dependence Graph, |
// R. Cytron, J. Ferrante, B. K. Rosen, M. N. Wegman, F. K. Zadeck |
bool |
Function::convertToSSA() |
{ |
// 0. calculate live in variables (for pruned SSA) |
buildLiveSets(); |
// 1. create the dominator tree |
domTree = new DominatorTree(&cfg); |
reinterpret_cast<DominatorTree *>(domTree)->findDominanceFrontiers(); |
// 2. insert PHI functions |
DLList workList; |
LValue *lval; |
BasicBlock *bb; |
int var; |
int iterCount = 0; |
int *hasAlready = new int[allBBlocks.getSize() * 2]; |
int *work = &hasAlready[allBBlocks.getSize()]; |
memset(hasAlready, 0, allBBlocks.getSize() * 2 * sizeof(int)); |
// for each variable |
for (var = 0; var < allLValues.getSize(); ++var) { |
if (!allLValues.get(var)) |
continue; |
lval = reinterpret_cast<Value *>(allLValues.get(var))->asLValue(); |
if (!lval || lval->defs.empty()) |
continue; |
++iterCount; |
// TODO: don't add phi functions for values that aren't used outside |
// the BB they're defined in |
// gather blocks with assignments to lval in workList |
for (Value::DefIterator d = lval->defs.begin(); |
d != lval->defs.end(); ++d) { |
bb = ((*d)->getInsn() ? (*d)->getInsn()->bb : NULL); |
if (!bb) |
continue; // instruction likely been removed but not XXX deleted |
if (work[bb->getId()] == iterCount) |
continue; |
work[bb->getId()] = iterCount; |
workList.insert(bb); |
} |
// for each block in workList, insert a phi for lval in the block's |
// dominance frontier (if we haven't already done so) |
for (DLList::Iterator wI = workList.iterator(); !wI.end(); wI.erase()) { |
bb = BasicBlock::get(wI); |
DLList::Iterator dfIter = bb->getDF().iterator(); |
for (; !dfIter.end(); dfIter.next()) { |
Instruction *phi; |
BasicBlock *dfBB = BasicBlock::get(dfIter); |
if (hasAlready[dfBB->getId()] >= iterCount) |
continue; |
hasAlready[dfBB->getId()] = iterCount; |
// pruned SSA: don't need a phi if the value is not live-in |
if (!dfBB->liveSet.test(lval->id)) |
continue; |
phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size)); |
dfBB->insertTail(phi); |
phi->setDef(0, lval); |
for (int s = 0; s < dfBB->cfg.incidentCount(); ++s) |
phi->setSrc(s, lval); |
if (work[dfBB->getId()] < iterCount) { |
work[dfBB->getId()] = iterCount; |
wI.insert(dfBB); |
} |
} |
} |
} |
delete[] hasAlready; |
RenamePass rename(this); |
return rename.run(); |
} |
RenamePass::RenamePass(Function *fn) : func(fn), prog(fn->getProgram()) |
{ |
stack = new Stack[func->allLValues.getSize()]; |
} |
RenamePass::~RenamePass() |
{ |
if (stack) |
delete[] stack; |
} |
LValue * |
RenamePass::getStackTop(Value *val) |
{ |
if (!stack[val->id].getSize()) |
return 0; |
return reinterpret_cast<LValue *>(stack[val->id].peek().u.p); |
} |
LValue * |
RenamePass::mkUndefined(Value *val) |
{ |
LValue *lval = val->asLValue(); |
assert(lval); |
LValue *ud = new_LValue(func, lval); |
Instruction *nop = new_Instruction(func, OP_NOP, typeOfSize(lval->reg.size)); |
nop->setDef(0, ud); |
BasicBlock::get(func->cfg.getRoot())->insertHead(nop); |
return ud; |
} |
bool RenamePass::run() |
{ |
if (!stack) |
return false; |
search(BasicBlock::get(func->domTree->getRoot())); |
return true; |
} |
// Go through BBs in dominance order, create new values for each definition, |
// and replace all sources with their current new values. |
// |
// NOTE: The values generated for function inputs/outputs have no connection |
// to their corresponding outputs/inputs in other functions. Only allocation |
// of physical registers will establish this connection. |
// |
void RenamePass::search(BasicBlock *bb) |
{ |
LValue *lval, *ssa; |
int d, s; |
const Target *targ = prog->getTarget(); |
// Put current definitions for function inputs values on the stack. |
// They can be used before any redefinitions are pushed. |
if (bb == BasicBlock::get(func->cfg.getRoot())) { |
for (std::deque<ValueDef>::iterator it = func->ins.begin(); |
it != func->ins.end(); ++it) { |
lval = it->get()->asLValue(); |
assert(lval); |
ssa = new_LValue(func, targ->nativeFile(lval->reg.file)); |
ssa->reg.size = lval->reg.size; |
ssa->reg.data.id = lval->reg.data.id; |
it->setSSA(ssa); |
stack[lval->id].push(ssa); |
} |
} |
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) { |
// PHI sources get definitions from the passes through the incident BBs, |
// so skip them here. |
if (stmt->op != OP_PHI) { |
for (s = 0; stmt->srcExists(s); ++s) { |
lval = stmt->getSrc(s)->asLValue(); |
if (!lval) |
continue; |
// Values on the stack created in previously visited blocks, and |
// function inputs, will be valid because they dominate this one. |
lval = getStackTop(lval); |
if (!lval) |
lval = mkUndefined(stmt->getSrc(s)); |
stmt->setSrc(s, lval); |
} |
} |
for (d = 0; stmt->defExists(d); ++d) { |
lval = stmt->def(d).get()->asLValue(); |
assert(lval); |
stmt->def(d).setSSA( |
new_LValue(func, targ->nativeFile(lval->reg.file))); |
stmt->def(d).get()->reg.size = lval->reg.size; |
stmt->def(d).get()->reg.data.id = lval->reg.data.id; |
stack[lval->id].push(stmt->def(d).get()); |
} |
} |
// Update sources of PHI ops corresponding to this BB in outgoing BBs. |
for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) { |
Instruction *phi; |
int p = 0; |
BasicBlock *sb = BasicBlock::get(ei.getNode()); |
// which predecessor of sb is bb ? |
for (Graph::EdgeIterator ei = sb->cfg.incident(); !ei.end(); ei.next()) { |
if (ei.getNode() == &bb->cfg) |
break; |
++p; |
} |
assert(p < sb->cfg.incidentCount()); |
for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { |
lval = getStackTop(phi->getSrc(p)); |
if (!lval) |
lval = mkUndefined(phi->getSrc(p)); |
phi->setSrc(p, lval); |
} |
} |
// Visit the BBs we dominate. |
for (Graph::EdgeIterator ei = bb->dom.outgoing(); !ei.end(); ei.next()) |
search(BasicBlock::get(ei.getNode())); |
// Update function outputs to the last definitions of their pre-SSA values. |
// I hope they're unique, i.e. that we get PHIs for all of them ... |
if (bb == BasicBlock::get(func->cfgExit)) { |
for (std::deque<ValueRef>::iterator it = func->outs.begin(); |
it != func->outs.end(); ++it) { |
lval = it->get()->asLValue(); |
if (!lval) |
continue; |
lval = getStackTop(lval); |
if (!lval) |
lval = mkUndefined(it->get()); |
it->set(lval); |
} |
} |
// Pop the values we created in this block from the stack because we will |
// return to blocks that we do not dominate. |
for (Instruction *stmt = bb->getFirst(); stmt; stmt = stmt->next) { |
if (stmt->op == OP_NOP) |
continue; |
for (d = 0; stmt->defExists(d); ++d) |
stack[stmt->def(d).preSSA()->id].pop(); |
} |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp |
---|
0,0 → 1,483 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir.h" |
#include "codegen/nv50_ir_target.h" |
namespace nv50_ir { |
const uint8_t Target::operationSrcNr[] = |
{ |
0, 0, // NOP, PHI |
0, 0, 0, 0, // UNION, SPLIT, MERGE, CONSTRAINT |
1, 1, 2, // MOV, LOAD, STORE |
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD |
1, 1, 1, // ABS, NEG, NOT |
2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR |
2, 2, 1, // MAX, MIN, SAT |
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT |
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT |
1, 1, 1, 1, 1, 1, // RCP, RSQ, LG2, SIN, COS, EX2 |
1, 1, 1, 1, 1, 2, // EXP, LOG, PRESIN, PREEX2, SQRT, POW |
0, 0, 0, 0, 0, // BRA, CALL, RET, CONT, BREAK, |
0, 0, 0, // PRERET,CONT,BREAK |
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR |
1, 1, 2, 1, 2, // VFETCH, PFETCH, EXPORT, LINTERP, PINTERP |
1, 1, // EMIT, RESTART |
1, 1, 1, // TEX, TXB, TXL, |
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP |
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA |
3, 3, 3, 3, // SUBFM, SUCLAMP, SUEAU, MADSP |
0, // TEXBAR |
1, 1, // DFDX, DFDY |
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP |
2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT |
2, 2, // ATOM, BAR |
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, |
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL |
3, // SHFL |
0 |
}; |
const OpClass Target::operationClass[] = |
{ |
// NOP; PHI; UNION, SPLIT, MERGE, CONSTRAINT |
OPCLASS_OTHER, |
OPCLASS_PSEUDO, |
OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, OPCLASS_PSEUDO, |
// MOV; LOAD; STORE |
OPCLASS_MOVE, |
OPCLASS_LOAD, |
OPCLASS_STORE, |
// ADD, SUB, MUL; DIV, MOD; MAD, FMA, SAD |
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, |
OPCLASS_ARITH, OPCLASS_ARITH, |
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, |
// ABS, NEG; NOT, AND, OR, XOR; SHL, SHR |
OPCLASS_CONVERT, OPCLASS_CONVERT, |
OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, |
OPCLASS_SHIFT, OPCLASS_SHIFT, |
// MAX, MIN |
OPCLASS_COMPARE, OPCLASS_COMPARE, |
// SAT, CEIL, FLOOR, TRUNC; CVT |
OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, OPCLASS_CONVERT, |
OPCLASS_CONVERT, |
// SET(AND,OR,XOR); SELP, SLCT |
OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, OPCLASS_COMPARE, |
OPCLASS_COMPARE, OPCLASS_COMPARE, |
// RCP, RSQ, LG2, SIN, COS; EX2, EXP, LOG, PRESIN, PREEX2; SQRT, POW |
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, |
OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, OPCLASS_SFU, |
OPCLASS_SFU, OPCLASS_SFU, |
// BRA, CALL, RET; CONT, BREAK, PRE(RET,CONT,BREAK); BRKPT, JOINAT, JOIN |
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
OPCLASS_FLOW, OPCLASS_FLOW, OPCLASS_FLOW, |
// DISCARD, EXIT |
OPCLASS_FLOW, OPCLASS_FLOW, |
// MEMBAR |
OPCLASS_CONTROL, |
// VFETCH, PFETCH, EXPORT |
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_STORE, |
// LINTERP, PINTERP |
OPCLASS_SFU, OPCLASS_SFU, |
// EMIT, RESTART |
OPCLASS_CONTROL, OPCLASS_CONTROL, |
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP |
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, |
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, |
OPCLASS_TEXTURE, OPCLASS_TEXTURE, |
// SULDB, SULDP, SUSTB, SUSTP; SUREDB, SUREDP, SULEA |
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_ATOMIC, OPCLASS_SURFACE, |
OPCLASS_SURFACE, OPCLASS_SURFACE, OPCLASS_SURFACE, |
// SUBFM, SUCLAMP, SUEAU, MADSP |
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_ARITH, |
// TEXBAR |
OPCLASS_OTHER, |
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP |
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, |
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, |
// POPCNT, INSBF, EXTBF, BFIND; PERMT |
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, |
OPCLASS_BITFIELD, |
// ATOM, BAR |
OPCLASS_ATOMIC, OPCLASS_CONTROL, |
// VADD, VAVG, VMIN, VMAX |
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, |
// VSAD, VSET, VSHR, VSHL |
OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, OPCLASS_VECTOR, |
// VSEL, CCTL |
OPCLASS_VECTOR, OPCLASS_CONTROL, |
// SHFL |
OPCLASS_OTHER, |
OPCLASS_PSEUDO // LAST |
}; |
extern Target *getTargetGM107(unsigned int chipset); |
extern Target *getTargetNVC0(unsigned int chipset); |
extern Target *getTargetNV50(unsigned int chipset); |
Target *Target::create(unsigned int chipset) |
{ |
STATIC_ASSERT(Elements(operationSrcNr) == OP_LAST + 1); |
STATIC_ASSERT(Elements(operationClass) == OP_LAST + 1); |
switch (chipset & ~0xf) { |
case 0x110: |
return getTargetGM107(chipset); |
case 0xc0: |
case 0xd0: |
case 0xe0: |
case 0xf0: |
case 0x100: |
return getTargetNVC0(chipset); |
case 0x50: |
case 0x80: |
case 0x90: |
case 0xa0: |
return getTargetNV50(chipset); |
default: |
ERROR("unsupported target: NV%x\n", chipset); |
return 0; |
} |
} |
void Target::destroy(Target *targ) |
{ |
delete targ; |
} |
CodeEmitter::CodeEmitter(const Target *target) : targ(target) |
{ |
} |
void |
CodeEmitter::setCodeLocation(void *ptr, uint32_t size) |
{ |
code = reinterpret_cast<uint32_t *>(ptr); |
codeSize = 0; |
codeSizeLimit = size; |
} |
void |
CodeEmitter::printBinary() const |
{ |
uint32_t *bin = code - codeSize / 4; |
INFO("program binary (%u bytes)", codeSize); |
for (unsigned int pos = 0; pos < codeSize / 4; ++pos) { |
if ((pos % 8) == 0) |
INFO("\n"); |
INFO("%08x ", bin[pos]); |
} |
INFO("\n"); |
} |
static inline uint32_t sizeToBundlesNVE4(uint32_t size) |
{ |
return (size + 55) / 56; |
} |
void |
CodeEmitter::prepareEmission(Program *prog) |
{ |
for (ArrayList::Iterator fi = prog->allFuncs.iterator(); |
!fi.end(); fi.next()) { |
Function *func = reinterpret_cast<Function *>(fi.get()); |
func->binPos = prog->binSize; |
prepareEmission(func); |
// adjust sizes & positions for schedulding info: |
if (prog->getTarget()->hasSWSched) { |
uint32_t adjPos = func->binPos; |
BasicBlock *bb = NULL; |
for (int i = 0; i < func->bbCount; ++i) { |
bb = func->bbArray[i]; |
int32_t adjSize = bb->binSize; |
if (adjPos % 64) { |
adjSize -= 64 - adjPos % 64; |
if (adjSize < 0) |
adjSize = 0; |
} |
adjSize = bb->binSize + sizeToBundlesNVE4(adjSize) * 8; |
bb->binPos = adjPos; |
bb->binSize = adjSize; |
adjPos += adjSize; |
} |
if (bb) |
func->binSize = adjPos - func->binPos; |
} |
prog->binSize += func->binSize; |
} |
} |
void |
CodeEmitter::prepareEmission(Function *func) |
{ |
func->bbCount = 0; |
func->bbArray = new BasicBlock * [func->cfg.getSize()]; |
BasicBlock::get(func->cfg.getRoot())->binPos = func->binPos; |
for (IteratorRef it = func->cfg.iteratorCFG(); !it->end(); it->next()) |
prepareEmission(BasicBlock::get(*it)); |
} |
void |
CodeEmitter::prepareEmission(BasicBlock *bb) |
{ |
Instruction *i, *next; |
Function *func = bb->getFunction(); |
int j; |
unsigned int nShort; |
for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); |
for (; j >= 0; --j) { |
BasicBlock *in = func->bbArray[j]; |
Instruction *exit = in->getExit(); |
if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { |
in->binSize -= 8; |
func->binSize -= 8; |
for (++j; j < func->bbCount; ++j) |
func->bbArray[j]->binPos -= 8; |
in->remove(exit); |
} |
bb->binPos = in->binPos + in->binSize; |
if (in->binSize) // no more no-op branches to bb |
break; |
} |
func->bbArray[func->bbCount++] = bb; |
if (!bb->getExit()) |
return; |
// determine encoding size, try to group short instructions |
nShort = 0; |
for (i = bb->getEntry(); i; i = next) { |
next = i->next; |
if (i->op == OP_MEMBAR && !targ->isOpSupported(OP_MEMBAR, TYPE_NONE)) { |
bb->remove(i); |
continue; |
} |
i->encSize = getMinEncodingSize(i); |
if (next && i->encSize < 8) |
++nShort; |
else |
if ((nShort & 1) && next && getMinEncodingSize(next) == 4) { |
if (i->isCommutationLegal(i->next)) { |
bb->permuteAdjacent(i, next); |
next->encSize = 4; |
next = i; |
i = i->prev; |
++nShort; |
} else |
if (i->isCommutationLegal(i->prev) && next->next) { |
bb->permuteAdjacent(i->prev, i); |
next->encSize = 4; |
next = next->next; |
bb->binSize += 4; |
++nShort; |
} else { |
i->encSize = 8; |
i->prev->encSize = 8; |
bb->binSize += 4; |
nShort = 0; |
} |
} else { |
i->encSize = 8; |
if (nShort & 1) { |
i->prev->encSize = 8; |
bb->binSize += 4; |
} |
nShort = 0; |
} |
bb->binSize += i->encSize; |
} |
if (bb->getExit()->encSize == 4) { |
assert(nShort); |
bb->getExit()->encSize = 8; |
bb->binSize += 4; |
if ((bb->getExit()->prev->encSize == 4) && !(nShort & 1)) { |
bb->binSize += 8; |
bb->getExit()->prev->encSize = 8; |
} |
} |
assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 8)); |
func->binSize += bb->binSize; |
} |
void |
Program::emitSymbolTable(struct nv50_ir_prog_info *info) |
{ |
unsigned int n = 0, nMax = allFuncs.getSize(); |
info->bin.syms = |
(struct nv50_ir_prog_symbol *)MALLOC(nMax * sizeof(*info->bin.syms)); |
for (ArrayList::Iterator fi = allFuncs.iterator(); |
!fi.end(); |
fi.next(), ++n) { |
Function *f = (Function *)fi.get(); |
assert(n < nMax); |
info->bin.syms[n].label = f->getLabel(); |
info->bin.syms[n].offset = f->binPos; |
} |
info->bin.numSyms = n; |
} |
bool |
Program::emitBinary(struct nv50_ir_prog_info *info) |
{ |
CodeEmitter *emit = target->getCodeEmitter(progType); |
emit->prepareEmission(this); |
if (dbgFlags & NV50_IR_DEBUG_BASIC) |
this->print(); |
if (!binSize) { |
code = NULL; |
return false; |
} |
code = reinterpret_cast<uint32_t *>(MALLOC(binSize)); |
if (!code) |
return false; |
emit->setCodeLocation(code, binSize); |
for (ArrayList::Iterator fi = allFuncs.iterator(); !fi.end(); fi.next()) { |
Function *fn = reinterpret_cast<Function *>(fi.get()); |
assert(emit->getCodeSize() == fn->binPos); |
for (int b = 0; b < fn->bbCount; ++b) { |
for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) { |
emit->emitInstruction(i); |
if (i->sType == TYPE_F64 || i->dType == TYPE_F64) |
info->io.fp64 = true; |
} |
} |
} |
info->bin.relocData = emit->getRelocInfo(); |
emitSymbolTable(info); |
// the nvc0 driver will print the binary iself together with the header |
if ((dbgFlags & NV50_IR_DEBUG_BASIC) && getTarget()->getChipset() < 0xc0) |
emit->printBinary(); |
delete emit; |
return true; |
} |
#define RELOC_ALLOC_INCREMENT 8 |
bool |
CodeEmitter::addReloc(RelocEntry::Type ty, int w, uint32_t data, uint32_t m, |
int s) |
{ |
unsigned int n = relocInfo ? relocInfo->count : 0; |
if (!(n % RELOC_ALLOC_INCREMENT)) { |
size_t size = sizeof(RelocInfo) + n * sizeof(RelocEntry); |
relocInfo = reinterpret_cast<RelocInfo *>( |
REALLOC(relocInfo, n ? size : 0, |
size + RELOC_ALLOC_INCREMENT * sizeof(RelocEntry))); |
if (!relocInfo) |
return false; |
if (n == 0) |
memset(relocInfo, 0, sizeof(RelocInfo)); |
} |
++relocInfo->count; |
relocInfo->entry[n].data = data; |
relocInfo->entry[n].mask = m; |
relocInfo->entry[n].offset = codeSize + w * 4; |
relocInfo->entry[n].bitPos = s; |
relocInfo->entry[n].type = ty; |
return true; |
} |
void |
RelocEntry::apply(uint32_t *binary, const RelocInfo *info) const |
{ |
uint32_t value = 0; |
switch (type) { |
case TYPE_CODE: value = info->codePos; break; |
case TYPE_BUILTIN: value = info->libPos; break; |
case TYPE_DATA: value = info->dataPos; break; |
default: |
assert(0); |
break; |
} |
value += data; |
value = (bitPos < 0) ? (value >> -bitPos) : (value << bitPos); |
binary[offset / 4] &= ~mask; |
binary[offset / 4] |= value & mask; |
} |
} // namespace nv50_ir |
#include "codegen/nv50_ir_driver.h" |
extern "C" { |
void |
nv50_ir_relocate_code(void *relocData, uint32_t *code, |
uint32_t codePos, |
uint32_t libPos, |
uint32_t dataPos) |
{ |
nv50_ir::RelocInfo *info = reinterpret_cast<nv50_ir::RelocInfo *>(relocData); |
info->codePos = codePos; |
info->libPos = libPos; |
info->dataPos = dataPos; |
for (unsigned int i = 0; i < info->count; ++i) |
info->entry[i].apply(code, info); |
} |
void |
nv50_ir_get_target_library(uint32_t chipset, |
const uint32_t **code, uint32_t *size) |
{ |
nv50_ir::Target *targ = nv50_ir::Target::create(chipset); |
targ->getBuiltinCode(code, size); |
nv50_ir::Target::destroy(targ); |
} |
} |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h |
---|
0,0 → 1,236 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_TARGET_H__ |
#define __NV50_IR_TARGET_H__ |
#include "codegen/nv50_ir.h" |
namespace nv50_ir { |
struct RelocInfo; |
struct RelocEntry |
{ |
enum Type |
{ |
TYPE_CODE, |
TYPE_BUILTIN, |
TYPE_DATA |
}; |
uint32_t data; |
uint32_t mask; |
uint32_t offset; |
int8_t bitPos; |
Type type; |
inline void apply(uint32_t *binary, const RelocInfo *info) const; |
}; |
struct RelocInfo |
{ |
uint32_t codePos; |
uint32_t libPos; |
uint32_t dataPos; |
uint32_t count; |
RelocEntry entry[0]; |
}; |
class CodeEmitter |
{ |
public: |
CodeEmitter(const Target *); |
virtual ~CodeEmitter() { } |
// returns whether the instruction was encodable and written |
virtual bool emitInstruction(Instruction *) = 0; |
virtual uint32_t getMinEncodingSize(const Instruction *) const = 0; |
void setCodeLocation(void *, uint32_t size); |
inline void *getCodeLocation() const { return code; } |
inline uint32_t getCodeSize() const { return codeSize; } |
bool addReloc(RelocEntry::Type, int w, uint32_t data, uint32_t m, |
int s); |
inline void *getRelocInfo() const { return relocInfo; } |
virtual void prepareEmission(Program *); |
virtual void prepareEmission(Function *); |
virtual void prepareEmission(BasicBlock *); |
void printBinary() const; |
protected: |
const Target *targ; |
uint32_t *code; |
uint32_t codeSize; |
uint32_t codeSizeLimit; |
RelocInfo *relocInfo; |
}; |
enum OpClass |
{ |
OPCLASS_MOVE = 0, |
OPCLASS_LOAD = 1, |
OPCLASS_STORE = 2, |
OPCLASS_ARITH = 3, |
OPCLASS_SHIFT = 4, |
OPCLASS_SFU = 5, |
OPCLASS_LOGIC = 6, |
OPCLASS_COMPARE = 7, |
OPCLASS_CONVERT = 8, |
OPCLASS_ATOMIC = 9, |
OPCLASS_TEXTURE = 10, |
OPCLASS_SURFACE = 11, |
OPCLASS_FLOW = 12, |
OPCLASS_PSEUDO = 14, |
OPCLASS_VECTOR = 15, |
OPCLASS_BITFIELD = 16, |
OPCLASS_CONTROL = 17, |
OPCLASS_OTHER = 18 |
}; |
class Target |
{ |
public: |
Target(bool m, bool j, bool s) : hasJoin(m), joinAnterior(j), hasSWSched(s) { } |
virtual ~Target() { } |
static Target *create(uint32_t chipset); |
static void destroy(Target *); |
// 0x50 and 0x84 to 0xaf for nv50 |
// 0xc0 to 0xdf for nvc0 |
inline uint32_t getChipset() const { return chipset; } |
virtual CodeEmitter *getCodeEmitter(Program::Type) = 0; |
// Drivers should upload this so we can use it from all programs. |
// The address chosen is supplied to the relocation routine. |
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0; |
virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { } |
virtual bool runLegalizePass(Program *, CGStage stage) const = 0; |
public: |
struct OpInfo |
{ |
OpInfo *variants; |
operation op; |
uint16_t srcTypes; |
uint16_t dstTypes; |
uint32_t immdBits; |
uint8_t srcNr; |
uint8_t srcMods[3]; |
uint8_t dstMods; |
uint16_t srcFiles[3]; |
uint16_t dstFiles; |
unsigned int minEncSize : 4; |
unsigned int vector : 1; |
unsigned int predicate : 1; |
unsigned int commutative : 1; |
unsigned int pseudo : 1; |
unsigned int flow : 1; |
unsigned int hasDest : 1; |
unsigned int terminator : 1; |
}; |
inline const OpInfo& getOpInfo(const Instruction *) const; |
inline const OpInfo& getOpInfo(const operation) const; |
inline DataFile nativeFile(DataFile f) const; |
virtual bool insnCanLoad(const Instruction *insn, int s, |
const Instruction *ld) const = 0; |
virtual bool isOpSupported(operation, DataType) const = 0; |
virtual bool isAccessSupported(DataFile, DataType) const = 0; |
virtual bool isModSupported(const Instruction *, |
int s, Modifier) const = 0; |
virtual bool isSatSupported(const Instruction *) const = 0; |
virtual bool isPostMultiplySupported(operation op, float f, |
int& e) const { return false; } |
virtual bool mayPredicate(const Instruction *, |
const Value *) const = 0; |
// whether @insn can be issued together with @next (order matters) |
virtual bool canDualIssue(const Instruction *insn, |
const Instruction *next) const { return false; } |
virtual int getLatency(const Instruction *) const { return 1; } |
virtual int getThroughput(const Instruction *) const { return 1; } |
virtual unsigned int getFileSize(DataFile) const = 0; |
virtual unsigned int getFileUnit(DataFile) const = 0; |
virtual uint32_t getSVAddress(DataFile, const Symbol *) const = 0; |
public: |
const bool hasJoin; // true if instructions have a join modifier |
const bool joinAnterior; // true if join is executed before the op |
const bool hasSWSched; // true if code should provide scheduling data |
static const uint8_t operationSrcNr[]; |
static const OpClass operationClass[]; |
static inline uint8_t getOpSrcNr(operation op) |
{ |
return operationSrcNr[op]; |
} |
static inline OpClass getOpClass(operation op) |
{ |
return operationClass[op]; |
} |
protected: |
uint32_t chipset; |
DataFile nativeFileMap[DATA_FILE_COUNT]; |
OpInfo opInfo[OP_LAST + 1]; |
}; |
const Target::OpInfo& Target::getOpInfo(const Instruction *insn) const |
{ |
return opInfo[MIN2(insn->op, OP_LAST)]; |
} |
const Target::OpInfo& Target::getOpInfo(const operation op) const |
{ |
return opInfo[op]; |
} |
inline DataFile Target::nativeFile(DataFile f) const |
{ |
return nativeFileMap[f]; |
} |
} // namespace nv50_ir |
#endif // __NV50_IR_TARGET_H__ |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp |
---|
0,0 → 1,100 |
/* |
* Copyright 2011 Christoph Bumiller |
* 2014 Red Hat Inc. |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target_gm107.h" |
#include "codegen/nv50_ir_lowering_gm107.h" |
namespace nv50_ir { |
Target *getTargetGM107(unsigned int chipset) |
{ |
return new TargetGM107(chipset); |
} |
// BULTINS / LIBRARY FUNCTIONS: |
// lazyness -> will just hardcode everything for the time being |
#include "lib/gm107.asm.h" |
void |
TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const |
{ |
*code = (const uint32_t *)&gm107_builtin_code[0]; |
*size = sizeof(gm107_builtin_code); |
} |
uint32_t |
TargetGM107::getBuiltinOffset(int builtin) const |
{ |
assert(builtin < NVC0_BUILTIN_COUNT); |
return gm107_builtin_offsets[builtin]; |
} |
bool |
TargetGM107::isOpSupported(operation op, DataType ty) const |
{ |
switch (op) { |
case OP_MAD: |
case OP_FMA: |
if (ty != TYPE_F32) |
return false; |
break; |
case OP_SAD: |
case OP_POW: |
case OP_SQRT: |
case OP_DIV: |
case OP_MOD: |
return false; |
default: |
break; |
} |
return true; |
} |
bool |
TargetGM107::runLegalizePass(Program *prog, CGStage stage) const |
{ |
if (stage == CG_STAGE_PRE_SSA) { |
GM107LoweringPass pass(prog); |
return pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_POST_RA) { |
NVC0LegalizePostRA pass(prog); |
return pass.run(prog, false, true); |
} else |
if (stage == CG_STAGE_SSA) { |
NVC0LegalizeSSA pass; |
return pass.run(prog, false, true); |
} |
return false; |
} |
CodeEmitter * |
TargetGM107::getCodeEmitter(Program::Type type) |
{ |
return createCodeEmitterGM107(type); |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.h |
---|
0,0 → 1,21 |
#include "codegen/nv50_ir_target_nvc0.h" |
namespace nv50_ir { |
class TargetGM107 : public TargetNVC0 |
{ |
public: |
TargetGM107(unsigned int chipset) : TargetNVC0(chipset) {} |
virtual CodeEmitter *getCodeEmitter(Program::Type); |
CodeEmitter *createCodeEmitterGM107(Program::Type); |
virtual bool runLegalizePass(Program *, CGStage) const; |
virtual void getBuiltinCode(const uint32_t **, uint32_t *) const; |
virtual uint32_t getBuiltinOffset(int) const; |
virtual bool isOpSupported(operation, DataType) const; |
}; |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp |
---|
0,0 → 1,570 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target_nv50.h" |
namespace nv50_ir { |
Target *getTargetNV50(unsigned int chipset) |
{ |
return new TargetNV50(chipset); |
} |
TargetNV50::TargetNV50(unsigned int card) : Target(true, true, false) |
{ |
chipset = card; |
wposMask = 0; |
for (unsigned int i = 0; i <= SV_LAST; ++i) |
sysvalLocation[i] = ~0; |
initOpInfo(); |
} |
#if 0 |
// BULTINS / LIBRARY FUNCTIONS: |
// TODO |
static const uint32_t nvc0_builtin_code[] = |
{ |
}; |
static const uint16_t nvc0_builtin_offsets[NV50_BUILTIN_COUNT] = |
{ |
}; |
#endif |
void |
TargetNV50::getBuiltinCode(const uint32_t **code, uint32_t *size) const |
{ |
*code = NULL; |
*size = 0; |
} |
uint32_t |
TargetNV50::getBuiltinOffset(int builtin) const |
{ |
return 0; |
} |
struct opProperties |
{ |
operation op; |
unsigned int mNeg : 4; |
unsigned int mAbs : 4; |
unsigned int mNot : 4; |
unsigned int mSat : 4; |
unsigned int fConst : 3; |
unsigned int fShared : 3; |
unsigned int fAttrib : 3; |
unsigned int fImm : 3; |
}; |
static const struct opProperties _initProps[] = |
{ |
// neg abs not sat c[] s[], a[], imm |
{ OP_ADD, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, |
{ OP_SUB, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 }, |
{ OP_MUL, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 }, |
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, |
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, |
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint |
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 }, |
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 }, |
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 }, |
{ OP_AND, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, |
{ OP_OR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, |
{ OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 }, |
{ OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 }, |
{ OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 }, |
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 }, |
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_LG2, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_RCP, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_RSQ, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
}; |
void TargetNV50::initOpInfo() |
{ |
unsigned int i, j; |
static const uint32_t commutative[(OP_LAST + 31) / 32] = |
{ |
// ADD,MAD,MUL,AND,OR,XOR,MAX,MIN |
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 |
}; |
static const uint32_t shortForm[(OP_LAST + 31) / 32] = |
{ |
// MOV,ADD,SUB,MUL,MAD,SAD,L/PINTERP,RCP,TEX,TXF |
0x00014e40, 0x00000040, 0x00000498, 0x00000000 |
}; |
static const operation noDestList[] = |
{ |
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, |
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, |
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, |
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, |
OP_SUREDB, OP_BAR |
}; |
static const operation noPredList[] = |
{ |
OP_CALL, OP_PREBREAK, OP_PRERET, OP_QUADON, OP_QUADPOP, OP_JOINAT, |
OP_EMIT, OP_RESTART |
}; |
for (i = 0; i < DATA_FILE_COUNT; ++i) |
nativeFileMap[i] = (DataFile)i; |
nativeFileMap[FILE_PREDICATE] = FILE_FLAGS; |
for (i = 0; i < OP_LAST; ++i) { |
opInfo[i].variants = NULL; |
opInfo[i].op = (operation)i; |
opInfo[i].srcTypes = 1 << (int)TYPE_F32; |
opInfo[i].dstTypes = 1 << (int)TYPE_F32; |
opInfo[i].immdBits = 0xffffffff; |
opInfo[i].srcNr = operationSrcNr[i]; |
for (j = 0; j < opInfo[i].srcNr; ++j) { |
opInfo[i].srcMods[j] = 0; |
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; |
} |
opInfo[i].dstMods = 0; |
opInfo[i].dstFiles = 1 << (int)FILE_GPR; |
opInfo[i].hasDest = 1; |
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); |
opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; |
opInfo[i].pseudo = (i < OP_MOV); |
opInfo[i].predicate = !opInfo[i].pseudo; |
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); |
opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; |
} |
for (i = 0; i < sizeof(noDestList) / sizeof(noDestList[0]); ++i) |
opInfo[noDestList[i]].hasDest = 0; |
for (i = 0; i < sizeof(noPredList) / sizeof(noPredList[0]); ++i) |
opInfo[noPredList[i]].predicate = 0; |
for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { |
const struct opProperties *prop = &_initProps[i]; |
for (int s = 0; s < 3; ++s) { |
if (prop->mNeg & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG; |
if (prop->mAbs & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS; |
if (prop->mNot & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT; |
if (prop->fConst & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST; |
if (prop->fShared & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_SHARED; |
if (prop->fAttrib & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_SHADER_INPUT; |
if (prop->fImm & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE; |
} |
if (prop->mSat & 8) |
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT; |
} |
if (chipset >= 0xa0) |
opInfo[OP_MUL].dstMods = NV50_IR_MOD_SAT; |
} |
unsigned int |
TargetNV50::getFileSize(DataFile file) const |
{ |
switch (file) { |
case FILE_NULL: return 0; |
case FILE_GPR: return 256; // in 16-bit units ** |
case FILE_PREDICATE: return 0; |
case FILE_FLAGS: return 4; |
case FILE_ADDRESS: return 4; |
case FILE_IMMEDIATE: return 0; |
case FILE_MEMORY_CONST: return 65536; |
case FILE_SHADER_INPUT: return 0x200; |
case FILE_SHADER_OUTPUT: return 0x200; |
case FILE_MEMORY_GLOBAL: return 0xffffffff; |
case FILE_MEMORY_SHARED: return 16 << 10; |
case FILE_MEMORY_LOCAL: return 48 << 10; |
case FILE_SYSTEM_VALUE: return 16; |
default: |
assert(!"invalid file"); |
return 0; |
} |
// ** only first 128 units encodable for 16-bit regs |
} |
unsigned int |
TargetNV50::getFileUnit(DataFile file) const |
{ |
if (file == FILE_GPR || file == FILE_ADDRESS) |
return 1; |
if (file == FILE_SYSTEM_VALUE) |
return 2; |
return 0; |
} |
uint32_t |
TargetNV50::getSVAddress(DataFile shaderFile, const Symbol *sym) const |
{ |
switch (sym->reg.data.sv.sv) { |
case SV_FACE: |
return 0x3fc; |
case SV_POSITION: |
{ |
uint32_t addr = sysvalLocation[sym->reg.data.sv.sv]; |
for (int c = 0; c < sym->reg.data.sv.index; ++c) |
if (wposMask & (1 << c)) |
addr += 4; |
return addr; |
} |
case SV_PRIMITIVE_ID: |
return shaderFile == FILE_SHADER_INPUT ? 0x18 : |
sysvalLocation[sym->reg.data.sv.sv]; |
case SV_NCTAID: |
return 0x8 + 2 * sym->reg.data.sv.index; |
case SV_CTAID: |
return 0xc + 2 * sym->reg.data.sv.index; |
case SV_NTID: |
return 0x2 + 2 * sym->reg.data.sv.index; |
case SV_TID: |
return 0; |
case SV_SAMPLE_POS: |
return 0; /* sample position is handled differently */ |
default: |
return sysvalLocation[sym->reg.data.sv.sv]; |
} |
} |
// long: rrr, arr, rcr, acr, rrc, arc, gcr, grr |
// short: rr, ar, rc, gr |
// immd: ri, gi |
bool |
TargetNV50::insnCanLoad(const Instruction *i, int s, |
const Instruction *ld) const |
{ |
DataFile sf = ld->src(0).getFile(); |
if (sf == FILE_IMMEDIATE && (i->predSrc >= 0 || i->flagsDef >= 0)) |
return false; |
if (s >= opInfo[i->op].srcNr) |
return false; |
if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) |
return false; |
if (s == 2 && i->src(1).getFile() != FILE_GPR) |
return false; |
// NOTE: don't rely on flagsDef |
if (sf == FILE_IMMEDIATE) |
for (int d = 0; i->defExists(d); ++d) |
if (i->def(d).getFile() == FILE_FLAGS) |
return false; |
unsigned mode = 0; |
for (int z = 0; z < Target::operationSrcNr[i->op]; ++z) { |
DataFile zf = (z == s) ? sf : i->src(z).getFile(); |
switch (zf) { |
case FILE_GPR: |
break; |
case FILE_MEMORY_SHARED: |
case FILE_SHADER_INPUT: |
mode |= 1 << (z * 2); |
break; |
case FILE_MEMORY_CONST: |
mode |= 2 << (z * 2); |
break; |
case FILE_IMMEDIATE: |
mode |= 3 << (z * 2); |
default: |
break; |
} |
} |
switch (mode) { |
case 0x00: |
case 0x01: |
case 0x03: |
case 0x08: |
case 0x0c: |
case 0x20: |
case 0x21: |
break; |
case 0x09: |
// Shader inputs get transformed to p[] in geometry shaders, and those |
// aren't allowed to be used at the same time as c[]. |
if (ld->bb->getProgram()->getType() == Program::TYPE_GEOMETRY) |
return false; |
break; |
case 0x0d: |
if (ld->bb->getProgram()->getType() != Program::TYPE_GEOMETRY) |
return false; |
break; |
default: |
return false; |
} |
uint8_t ldSize; |
if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { |
// 32-bit MUL will be split into 16-bit MULs |
if (ld->src(0).isIndirect(0)) |
return false; |
if (sf == FILE_IMMEDIATE) |
return false; |
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST) |
return false; |
ldSize = 2; |
} else { |
ldSize = typeSizeof(ld->dType); |
} |
if (sf == FILE_IMMEDIATE) |
return true; |
// Check if memory access is encodable: |
if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access |
return false; |
if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) |
return false; |
if (ld->src(0).isIndirect(0)) { |
for (int z = 0; i->srcExists(z); ++z) |
if (i->src(z).isIndirect(0)) |
return false; |
// s[] access only possible in CP, $aX always applies |
if (sf == FILE_MEMORY_SHARED) |
return true; |
if (!ld->bb) // can't check type ... |
return false; |
Program::Type pt = ld->bb->getProgram()->getType(); |
// $aX applies to c[] only in VP, FP, GP if p[] is not accessed |
if (pt == Program::TYPE_COMPUTE) |
return false; |
if (pt == Program::TYPE_GEOMETRY) { |
if (sf == FILE_MEMORY_CONST) |
return i->src(s).getFile() != FILE_SHADER_INPUT; |
return sf == FILE_SHADER_INPUT; |
} |
return sf == FILE_MEMORY_CONST; |
} |
return true; |
} |
bool |
TargetNV50::isAccessSupported(DataFile file, DataType ty) const |
{ |
if (ty == TYPE_B96 || ty == TYPE_NONE) |
return false; |
if (typeSizeof(ty) > 4) |
return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL); |
return true; |
} |
bool |
TargetNV50::isOpSupported(operation op, DataType ty) const |
{ |
if (ty == TYPE_F64 && chipset < 0xa0) |
return false; |
switch (op) { |
case OP_PRERET: |
return chipset >= 0xa0; |
case OP_TXG: |
return chipset >= 0xa3 && chipset != 0xaa && chipset != 0xac; |
case OP_POW: |
case OP_SQRT: |
case OP_DIV: |
case OP_MOD: |
case OP_SET_AND: |
case OP_SET_OR: |
case OP_SET_XOR: |
case OP_SLCT: |
case OP_SELP: |
case OP_POPCNT: |
case OP_INSBF: |
case OP_EXTBF: |
case OP_EXIT: // want exit modifier instead (on NOP if required) |
case OP_MEMBAR: |
return false; |
case OP_SAD: |
return ty == TYPE_S32; |
default: |
return true; |
} |
} |
bool |
TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const |
{ |
if (!isFloatType(insn->dType)) { |
switch (insn->op) { |
case OP_ABS: |
case OP_NEG: |
case OP_CVT: |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
break; |
case OP_ADD: |
if (insn->src(s ? 0 : 1).mod.neg()) |
return false; |
break; |
case OP_SUB: |
if (s == 0) |
return insn->src(1).mod.neg() ? false : true; |
break; |
case OP_SET: |
if (insn->sType != TYPE_F32) |
return false; |
break; |
default: |
return false; |
} |
} |
if (s >= 3) |
return false; |
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod; |
} |
bool |
TargetNV50::mayPredicate(const Instruction *insn, const Value *pred) const |
{ |
if (insn->getPredicate() || insn->flagsSrc >= 0) |
return false; |
for (int s = 0; insn->srcExists(s); ++s) |
if (insn->src(s).getFile() == FILE_IMMEDIATE) |
return false; |
return opInfo[insn->op].predicate; |
} |
bool |
TargetNV50::isSatSupported(const Instruction *insn) const |
{ |
if (insn->op == OP_CVT) |
return true; |
if (insn->dType != TYPE_F32) |
return false; |
return opInfo[insn->op].dstMods & NV50_IR_MOD_SAT; |
} |
int TargetNV50::getLatency(const Instruction *i) const |
{ |
// TODO: tune these values |
if (i->op == OP_LOAD) { |
switch (i->src(0).getFile()) { |
case FILE_MEMORY_LOCAL: |
case FILE_MEMORY_GLOBAL: |
return 100; // really 400 to 800 |
default: |
return 22; |
} |
} |
return 22; |
} |
// These are "inverse" throughput values, i.e. the number of cycles required |
// to issue a specific instruction for a full warp (32 threads). |
// |
// Assuming we have more than 1 warp in flight, a higher issue latency results |
// in a lower result latency since the MP will have spent more time with other |
// warps. |
// This also helps to determine the number of cycles between instructions in |
// a single warp. |
// |
int TargetNV50::getThroughput(const Instruction *i) const |
{ |
// TODO: tune these values |
if (i->dType == TYPE_F32) { |
switch (i->op) { |
case OP_RCP: |
case OP_RSQ: |
case OP_LG2: |
case OP_SIN: |
case OP_COS: |
case OP_PRESIN: |
case OP_PREEX2: |
return 16; |
default: |
return 4; |
} |
} else |
if (i->dType == TYPE_U32 || i->dType == TYPE_S32) { |
return 4; |
} else |
if (i->dType == TYPE_F64) { |
return 32; |
} else { |
return 1; |
} |
} |
static void |
recordLocation(uint16_t *locs, uint8_t *masks, |
const struct nv50_ir_varying *var) |
{ |
uint16_t addr = var->slot[0] * 4; |
switch (var->sn) { |
case TGSI_SEMANTIC_POSITION: locs[SV_POSITION] = addr; break; |
case TGSI_SEMANTIC_INSTANCEID: locs[SV_INSTANCE_ID] = addr; break; |
case TGSI_SEMANTIC_VERTEXID: locs[SV_VERTEX_ID] = addr; break; |
case TGSI_SEMANTIC_PRIMID: locs[SV_PRIMITIVE_ID] = addr; break; |
case TGSI_SEMANTIC_LAYER: locs[SV_LAYER] = addr; break; |
case TGSI_SEMANTIC_VIEWPORT_INDEX: locs[SV_VIEWPORT_INDEX] = addr; break; |
default: |
break; |
} |
if (var->sn == TGSI_SEMANTIC_POSITION && masks) |
masks[0] = var->mask; |
} |
void |
TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info *info) |
{ |
unsigned int i; |
for (i = 0; i < info->numOutputs; ++i) |
recordLocation(sysvalLocation, NULL, &info->out[i]); |
for (i = 0; i < info->numInputs; ++i) |
recordLocation(sysvalLocation, &wposMask, &info->in[i]); |
for (i = 0; i < info->numSysVals; ++i) |
recordLocation(sysvalLocation, NULL, &info->sv[i]); |
if (sysvalLocation[SV_POSITION] >= 0x200) { |
// not assigned by driver, but we need it internally |
wposMask = 0x8; |
sysvalLocation[SV_POSITION] = 0; |
} |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.h |
---|
0,0 → 1,72 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target.h" |
namespace nv50_ir { |
#define NVC0_BUILTIN_DIV_U32 0 |
#define NVC0_BUILTIN_DIV_S32 1 |
#define NVC0_BUILTIN_RCP_F64 2 |
#define NVC0_BUILTIN_RSQ_F64 3 |
#define NVC0_BUILTIN_COUNT 4 |
class TargetNV50 : public Target |
{ |
public: |
TargetNV50(unsigned int chipset); |
virtual CodeEmitter *getCodeEmitter(Program::Type); |
virtual bool runLegalizePass(Program *, CGStage stage) const; |
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; |
virtual void parseDriverInfo(const struct nv50_ir_prog_info *); |
virtual bool insnCanLoad(const Instruction *insn, int s, |
const Instruction *ld) const; |
virtual bool isOpSupported(operation, DataType) const; |
virtual bool isAccessSupported(DataFile, DataType) const; |
virtual bool isModSupported(const Instruction *, int s, Modifier) const; |
virtual bool isSatSupported(const Instruction *) const; |
virtual bool mayPredicate(const Instruction *, const Value *) const; |
virtual int getLatency(const Instruction *) const; |
virtual int getThroughput(const Instruction *) const; |
virtual unsigned int getFileSize(DataFile) const; |
virtual unsigned int getFileUnit(DataFile) const; |
virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const; |
uint32_t getBuiltinOffset(int builtin) const; |
private: |
void initOpInfo(); |
uint16_t sysvalLocation[SV_LAST + 1]; |
uint8_t wposMask; |
}; |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp |
---|
0,0 → 1,617 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target_nvc0.h" |
namespace nv50_ir { |
Target *getTargetNVC0(unsigned int chipset) |
{ |
return new TargetNVC0(chipset); |
} |
TargetNVC0::TargetNVC0(unsigned int card) : |
Target(card < 0x110, false, card >= 0xe4) |
{ |
chipset = card; |
initOpInfo(); |
} |
// BULTINS / LIBRARY FUNCTIONS: |
// lazyness -> will just hardcode everything for the time being |
#include "lib/gf100.asm.h" |
#include "lib/gk104.asm.h" |
#include "lib/gk110.asm.h" |
void |
TargetNVC0::getBuiltinCode(const uint32_t **code, uint32_t *size) const |
{ |
switch (chipset & ~0xf) { |
case 0xe0: |
if (chipset < NVISA_GK20A_CHIPSET) { |
*code = (const uint32_t *)&gk104_builtin_code[0]; |
*size = sizeof(gk104_builtin_code); |
break; |
} |
/* fall-through for GK20A */ |
case 0xf0: |
case 0x100: |
*code = (const uint32_t *)&gk110_builtin_code[0]; |
*size = sizeof(gk110_builtin_code); |
break; |
default: |
*code = (const uint32_t *)&gf100_builtin_code[0]; |
*size = sizeof(gf100_builtin_code); |
break; |
} |
} |
uint32_t |
TargetNVC0::getBuiltinOffset(int builtin) const |
{ |
assert(builtin < NVC0_BUILTIN_COUNT); |
switch (chipset & ~0xf) { |
case 0xe0: |
if (chipset < NVISA_GK20A_CHIPSET) |
return gk104_builtin_offsets[builtin]; |
/* fall-through for GK20A */ |
case 0xf0: |
case 0x100: |
return gk110_builtin_offsets[builtin]; |
default: |
return gf100_builtin_offsets[builtin]; |
} |
} |
struct opProperties |
{ |
operation op; |
unsigned int mNeg : 4; |
unsigned int mAbs : 4; |
unsigned int mNot : 4; |
unsigned int mSat : 4; |
unsigned int fConst : 3; |
unsigned int fImmd : 4; // last bit indicates if full immediate is suppoted |
}; |
static const struct opProperties _initProps[] = |
{ |
// neg abs not sat c[] imm |
{ OP_ADD, 0x3, 0x3, 0x0, 0x8, 0x2, 0x2 | 0x8 }, |
{ OP_SUB, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 | 0x8 }, |
{ OP_MUL, 0x3, 0x0, 0x0, 0x8, 0x2, 0x2 | 0x8 }, |
{ OP_MAX, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_MIN, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_MAD, 0x7, 0x0, 0x0, 0x8, 0x6, 0x2 | 0x8 }, // special c[] constraint |
{ OP_MADSP, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, |
{ OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, |
{ OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, |
{ OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, |
{ OP_CEIL, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, |
{ OP_FLOOR, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, |
{ OP_TRUNC, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, |
{ OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, |
{ OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, |
{ OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, |
{ OP_SHL, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SHR, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SET, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SLCT, 0x4, 0x0, 0x0, 0x0, 0x6, 0x2 }, // special c[] constraint |
{ OP_PREEX2, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 }, |
{ OP_PRESIN, 0x1, 0x1, 0x0, 0x0, 0x1, 0x1 }, |
{ OP_COS, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_SIN, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_EX2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_LG2, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_RCP, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_RSQ, 0x1, 0x1, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_DFDX, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_DFDY, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, |
{ OP_CALL, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, |
{ OP_POPCNT, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 }, |
{ OP_INSBF, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, |
{ OP_EXTBF, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_BFIND, 0x0, 0x0, 0x1, 0x0, 0x1, 0x1 }, |
{ OP_PERMT, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, |
{ OP_SET_AND, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SET_OR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SET_XOR, 0x3, 0x3, 0x0, 0x0, 0x2, 0x2 }, |
// saturate only: |
{ OP_LINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 }, |
{ OP_PINTERP, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0 }, |
// nve4 ops: |
{ OP_SULDB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, |
{ OP_SUSTB, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, |
{ OP_SUSTP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0 }, |
{ OP_SUCLAMP, 0x0, 0x0, 0x0, 0x0, 0x2, 0x2 }, |
{ OP_SUBFM, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 }, |
{ OP_SUEAU, 0x0, 0x0, 0x0, 0x0, 0x6, 0x2 } |
}; |
void TargetNVC0::initOpInfo() |
{ |
unsigned int i, j; |
static const uint32_t commutative[(OP_LAST + 31) / 32] = |
{ |
// ADD, MAD, MUL, AND, OR, XOR, MAX, MIN |
0x0670ca00, 0x0000003f, 0x00000000, 0x00000000 |
}; |
static const uint32_t shortForm[(OP_LAST + 31) / 32] = |
{ |
// ADD, MAD, MUL, AND, OR, XOR, PRESIN, PREEX2, SFN, CVT, PINTERP, MOV |
0x0670ca00, 0x00000000, 0x00000000, 0x00000000 |
}; |
static const operation noDest[] = |
{ |
OP_STORE, OP_WRSV, OP_EXPORT, OP_BRA, OP_CALL, OP_RET, OP_EXIT, |
OP_DISCARD, OP_CONT, OP_BREAK, OP_PRECONT, OP_PREBREAK, OP_PRERET, |
OP_JOIN, OP_JOINAT, OP_BRKPT, OP_MEMBAR, OP_EMIT, OP_RESTART, |
OP_QUADON, OP_QUADPOP, OP_TEXBAR, OP_SUSTB, OP_SUSTP, OP_SUREDP, |
OP_SUREDB, OP_BAR |
}; |
static const operation noPred[] = |
{ |
OP_CALL, OP_PRERET, OP_QUADON, OP_QUADPOP, |
OP_JOINAT, OP_PREBREAK, OP_PRECONT, OP_BRKPT |
}; |
for (i = 0; i < DATA_FILE_COUNT; ++i) |
nativeFileMap[i] = (DataFile)i; |
nativeFileMap[FILE_ADDRESS] = FILE_GPR; |
for (i = 0; i < OP_LAST; ++i) { |
opInfo[i].variants = NULL; |
opInfo[i].op = (operation)i; |
opInfo[i].srcTypes = 1 << (int)TYPE_F32; |
opInfo[i].dstTypes = 1 << (int)TYPE_F32; |
opInfo[i].immdBits = 0; |
opInfo[i].srcNr = operationSrcNr[i]; |
for (j = 0; j < opInfo[i].srcNr; ++j) { |
opInfo[i].srcMods[j] = 0; |
opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; |
} |
opInfo[i].dstMods = 0; |
opInfo[i].dstFiles = 1 << (int)FILE_GPR; |
opInfo[i].hasDest = 1; |
opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); |
opInfo[i].commutative = (commutative[i / 32] >> (i % 32)) & 1; |
opInfo[i].pseudo = (i < OP_MOV); |
opInfo[i].predicate = !opInfo[i].pseudo; |
opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); |
opInfo[i].minEncSize = (shortForm[i / 32] & (1 << (i % 32))) ? 4 : 8; |
} |
for (i = 0; i < sizeof(noDest) / sizeof(noDest[0]); ++i) |
opInfo[noDest[i]].hasDest = 0; |
for (i = 0; i < sizeof(noPred) / sizeof(noPred[0]); ++i) |
opInfo[noPred[i]].predicate = 0; |
for (i = 0; i < sizeof(_initProps) / sizeof(_initProps[0]); ++i) { |
const struct opProperties *prop = &_initProps[i]; |
for (int s = 0; s < 3; ++s) { |
if (prop->mNeg & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NEG; |
if (prop->mAbs & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_ABS; |
if (prop->mNot & (1 << s)) |
opInfo[prop->op].srcMods[s] |= NV50_IR_MOD_NOT; |
if (prop->fConst & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_MEMORY_CONST; |
if (prop->fImmd & (1 << s)) |
opInfo[prop->op].srcFiles[s] |= 1 << (int)FILE_IMMEDIATE; |
if (prop->fImmd & 8) |
opInfo[prop->op].immdBits = 0xffffffff; |
} |
if (prop->mSat & 8) |
opInfo[prop->op].dstMods = NV50_IR_MOD_SAT; |
} |
} |
unsigned int |
TargetNVC0::getFileSize(DataFile file) const |
{ |
switch (file) { |
case FILE_NULL: return 0; |
case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63; |
case FILE_PREDICATE: return 7; |
case FILE_FLAGS: return 1; |
case FILE_ADDRESS: return 0; |
case FILE_IMMEDIATE: return 0; |
case FILE_MEMORY_CONST: return 65536; |
case FILE_SHADER_INPUT: return 0x400; |
case FILE_SHADER_OUTPUT: return 0x400; |
case FILE_MEMORY_GLOBAL: return 0xffffffff; |
case FILE_MEMORY_SHARED: return 16 << 10; |
case FILE_MEMORY_LOCAL: return 48 << 10; |
case FILE_SYSTEM_VALUE: return 32; |
default: |
assert(!"invalid file"); |
return 0; |
} |
} |
unsigned int |
TargetNVC0::getFileUnit(DataFile file) const |
{ |
if (file == FILE_GPR || file == FILE_ADDRESS || file == FILE_SYSTEM_VALUE) |
return 2; |
return 0; |
} |
uint32_t |
TargetNVC0::getSVAddress(DataFile shaderFile, const Symbol *sym) const |
{ |
const int idx = sym->reg.data.sv.index; |
const SVSemantic sv = sym->reg.data.sv.sv; |
const bool isInput = shaderFile == FILE_SHADER_INPUT; |
const bool kepler = getChipset() >= NVISA_GK104_CHIPSET; |
switch (sv) { |
case SV_POSITION: return 0x070 + idx * 4; |
case SV_INSTANCE_ID: return 0x2f8; |
case SV_VERTEX_ID: return 0x2fc; |
case SV_PRIMITIVE_ID: return isInput ? 0x060 : 0x040; |
case SV_LAYER: return 0x064; |
case SV_VIEWPORT_INDEX: return 0x068; |
case SV_POINT_SIZE: return 0x06c; |
case SV_CLIP_DISTANCE: return 0x2c0 + idx * 4; |
case SV_POINT_COORD: return 0x2e0 + idx * 4; |
case SV_FACE: return 0x3fc; |
case SV_TESS_FACTOR: return 0x000 + idx * 4; |
case SV_TESS_COORD: return 0x2f0 + idx * 4; |
case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0; |
case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0; |
case SV_GRIDID: return kepler ? 0x18 : ~0; |
case SV_SAMPLE_INDEX: return 0; |
case SV_SAMPLE_POS: return 0; |
case SV_SAMPLE_MASK: return 0; |
default: |
return 0xffffffff; |
} |
} |
bool |
TargetNVC0::insnCanLoad(const Instruction *i, int s, |
const Instruction *ld) const |
{ |
DataFile sf = ld->src(0).getFile(); |
// immediate 0 can be represented by GPR $r63/$r255 |
if (sf == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) |
return (!i->isPseudo() && |
!i->asTex() && |
i->op != OP_EXPORT && i->op != OP_STORE); |
if (s >= opInfo[i->op].srcNr) |
return false; |
if (!(opInfo[i->op].srcFiles[s] & (1 << (int)sf))) |
return false; |
// indirect loads can only be done by OP_LOAD/VFETCH/INTERP on nvc0 |
if (ld->src(0).isIndirect(0)) |
return false; |
for (int k = 0; i->srcExists(k); ++k) { |
if (i->src(k).getFile() == FILE_IMMEDIATE) { |
if (k == 2 && i->op == OP_SUCLAMP) // special case |
continue; |
if (i->getSrc(k)->reg.data.u64 != 0) |
return false; |
} else |
if (i->src(k).getFile() != FILE_GPR && |
i->src(k).getFile() != FILE_PREDICATE) { |
return false; |
} |
} |
// not all instructions support full 32 bit immediates |
if (sf == FILE_IMMEDIATE) { |
Storage ® = ld->getSrc(0)->asImm()->reg; |
if (typeSizeof(i->sType) > 4) |
return false; |
if (opInfo[i->op].immdBits != 0xffffffff) { |
if (i->sType == TYPE_F32) { |
if (reg.data.u32 & 0xfff) |
return false; |
} else |
if (i->sType == TYPE_S32 || i->sType == TYPE_U32) { |
// with u32, 0xfffff counts as 0xffffffff as well |
if (reg.data.s32 > 0x7ffff || reg.data.s32 < -0x80000) |
return false; |
} |
} else |
if (i->op == OP_MAD || i->op == OP_FMA) { |
// requires src == dst, cannot decide before RA |
// (except if we implement more constraints) |
if (ld->getSrc(0)->asImm()->reg.data.u32 & 0xfff) |
return false; |
} else |
if (i->op == OP_ADD && i->sType == TYPE_F32) { |
// add f32 LIMM cannot saturate |
if (i->saturate && (reg.data.u32 & 0xfff)) |
return false; |
} |
} |
return true; |
} |
bool |
TargetNVC0::isAccessSupported(DataFile file, DataType ty) const |
{ |
if (ty == TYPE_NONE) |
return false; |
if (file == FILE_MEMORY_CONST && getChipset() >= 0xe0) // wrong encoding ? |
return typeSizeof(ty) <= 8; |
if (ty == TYPE_B96) |
return false; |
return true; |
} |
bool |
TargetNVC0::isOpSupported(operation op, DataType ty) const |
{ |
if ((op == OP_MAD || op == OP_FMA) && (ty != TYPE_F32)) |
return false; |
if (op == OP_SAD && ty != TYPE_S32 && ty != TYPE_U32) |
return false; |
if (op == OP_POW || op == OP_SQRT || op == OP_DIV || op == OP_MOD) |
return false; |
return true; |
} |
bool |
TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const |
{ |
if (!isFloatType(insn->dType)) { |
switch (insn->op) { |
case OP_ABS: |
case OP_NEG: |
case OP_CVT: |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
case OP_POPCNT: |
case OP_BFIND: |
break; |
case OP_SET: |
if (insn->sType != TYPE_F32) |
return false; |
break; |
case OP_ADD: |
if (mod.abs()) |
return false; |
if (insn->src(s ? 0 : 1).mod.neg()) |
return false; |
break; |
case OP_SUB: |
if (s == 0) |
return insn->src(1).mod.neg() ? false : true; |
break; |
default: |
return false; |
} |
} |
if (s >= 3) |
return false; |
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod; |
} |
bool |
TargetNVC0::mayPredicate(const Instruction *insn, const Value *pred) const |
{ |
if (insn->getPredicate()) |
return false; |
return opInfo[insn->op].predicate; |
} |
bool |
TargetNVC0::isSatSupported(const Instruction *insn) const |
{ |
if (insn->op == OP_CVT) |
return true; |
if (!(opInfo[insn->op].dstMods & NV50_IR_MOD_SAT)) |
return false; |
if (insn->dType == TYPE_U32) |
return (insn->op == OP_ADD) || (insn->op == OP_MAD); |
// add f32 LIMM cannot saturate |
if (insn->op == OP_ADD && insn->sType == TYPE_F32) { |
if (insn->getSrc(1)->asImm() && |
insn->getSrc(1)->reg.data.u32 & 0xfff) |
return false; |
} |
return insn->dType == TYPE_F32; |
} |
bool |
TargetNVC0::isPostMultiplySupported(operation op, float f, int& e) const |
{ |
if (op != OP_MUL) |
return false; |
f = fabsf(f); |
e = static_cast<int>(log2f(f)); |
if (e < -3 || e > 3) |
return false; |
return f == exp2f(static_cast<float>(e)); |
} |
// TODO: better values |
// this could be more precise, e.g. depending on the issue-to-read/write delay |
// of the depending instruction, but it's good enough |
int TargetNVC0::getLatency(const Instruction *i) const |
{ |
if (chipset >= 0xe4) { |
if (i->dType == TYPE_F64 || i->sType == TYPE_F64) |
return 20; |
switch (i->op) { |
case OP_LINTERP: |
case OP_PINTERP: |
return 15; |
case OP_LOAD: |
if (i->src(0).getFile() == FILE_MEMORY_CONST) |
return 9; |
// fall through |
case OP_VFETCH: |
return 24; |
default: |
if (Target::getOpClass(i->op) == OPCLASS_TEXTURE) |
return 17; |
if (i->op == OP_MUL && i->dType != TYPE_F32) |
return 15; |
return 9; |
} |
} else { |
if (i->op == OP_LOAD) { |
if (i->cache == CACHE_CV) |
return 700; |
return 48; |
} |
return 24; |
} |
return 32; |
} |
// These are "inverse" throughput values, i.e. the number of cycles required |
// to issue a specific instruction for a full warp (32 threads). |
// |
// Assuming we have more than 1 warp in flight, a higher issue latency results |
// in a lower result latency since the MP will have spent more time with other |
// warps. |
// This also helps to determine the number of cycles between instructions in |
// a single warp. |
// |
int TargetNVC0::getThroughput(const Instruction *i) const |
{ |
// TODO: better values |
if (i->dType == TYPE_F32) { |
switch (i->op) { |
case OP_ADD: |
case OP_MUL: |
case OP_MAD: |
case OP_FMA: |
return 1; |
case OP_CVT: |
case OP_CEIL: |
case OP_FLOOR: |
case OP_TRUNC: |
case OP_SET: |
case OP_SLCT: |
case OP_MIN: |
case OP_MAX: |
return 2; |
case OP_RCP: |
case OP_RSQ: |
case OP_LG2: |
case OP_SIN: |
case OP_COS: |
case OP_PRESIN: |
case OP_PREEX2: |
default: |
return 8; |
} |
} else |
if (i->dType == TYPE_U32 || i->dType == TYPE_S32) { |
switch (i->op) { |
case OP_ADD: |
case OP_AND: |
case OP_OR: |
case OP_XOR: |
case OP_NOT: |
return 1; |
case OP_MUL: |
case OP_MAD: |
case OP_CVT: |
case OP_SET: |
case OP_SLCT: |
case OP_SHL: |
case OP_SHR: |
case OP_NEG: |
case OP_ABS: |
case OP_MIN: |
case OP_MAX: |
default: |
return 2; |
} |
} else |
if (i->dType == TYPE_F64) { |
return 2; |
} else { |
return 1; |
} |
} |
bool TargetNVC0::canDualIssue(const Instruction *a, const Instruction *b) const |
{ |
const OpClass clA = operationClass[a->op]; |
const OpClass clB = operationClass[b->op]; |
if (getChipset() >= 0xe4) { |
// not texturing |
// not if the 2nd instruction isn't necessarily executed |
if (clA == OPCLASS_TEXTURE || clA == OPCLASS_FLOW) |
return false; |
// anything with MOV |
if (a->op == OP_MOV || b->op == OP_MOV) |
return true; |
if (clA == clB) { |
// only F32 arith or integer additions |
if (clA != OPCLASS_ARITH) |
return false; |
return (a->dType == TYPE_F32 || a->op == OP_ADD || |
b->dType == TYPE_F32 || b->op == OP_ADD); |
} |
// nothing with TEXBAR |
if (a->op == OP_TEXBAR || b->op == OP_TEXBAR) |
return false; |
// no loads and stores accessing the the same space |
if ((clA == OPCLASS_LOAD && clB == OPCLASS_STORE) || |
(clB == OPCLASS_LOAD && clA == OPCLASS_STORE)) |
if (a->src(0).getFile() == b->src(0).getFile()) |
return false; |
// no > 32-bit ops |
if (typeSizeof(a->dType) > 4 || typeSizeof(b->dType) > 4 || |
typeSizeof(a->sType) > 4 || typeSizeof(b->sType) > 4) |
return false; |
return true; |
} else { |
return false; // info not needed (yet) |
} |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.h |
---|
0,0 → 1,73 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_target.h" |
namespace nv50_ir { |
#define NVC0_BUILTIN_DIV_U32 0 |
#define NVC0_BUILTIN_DIV_S32 1 |
#define NVC0_BUILTIN_RCP_F64 2 |
#define NVC0_BUILTIN_RSQ_F64 3 |
#define NVC0_BUILTIN_COUNT 4 |
class TargetNVC0 : public Target |
{ |
public: |
TargetNVC0(unsigned int chipset); |
virtual CodeEmitter *getCodeEmitter(Program::Type); |
CodeEmitter *createCodeEmitterNVC0(Program::Type); |
CodeEmitter *createCodeEmitterGK110(Program::Type); |
virtual bool runLegalizePass(Program *, CGStage stage) const; |
virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; |
virtual uint32_t getBuiltinOffset(int builtin) const; |
virtual bool insnCanLoad(const Instruction *insn, int s, |
const Instruction *ld) const; |
virtual bool isOpSupported(operation, DataType) const; |
virtual bool isAccessSupported(DataFile, DataType) const; |
virtual bool isModSupported(const Instruction *, int s, Modifier) const; |
virtual bool isSatSupported(const Instruction *) const; |
virtual bool isPostMultiplySupported(operation, float, int& e) const; |
virtual bool mayPredicate(const Instruction *, const Value *) const; |
virtual bool canDualIssue(const Instruction *, const Instruction *) const; |
virtual int getLatency(const Instruction *) const; |
virtual int getThroughput(const Instruction *) const; |
virtual unsigned int getFileSize(DataFile) const; |
virtual unsigned int getFileUnit(DataFile) const; |
virtual uint32_t getSVAddress(DataFile shaderFile, const Symbol *sv) const; |
private: |
void initOpInfo(); |
}; |
bool calculateSchedDataNVC0(const Target *, Function *); |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp |
---|
0,0 → 1,392 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#include "codegen/nv50_ir_util.h" |
namespace nv50_ir { |
void DLList::clear() |
{ |
for (Item *next, *item = head.next; item != &head; item = next) { |
next = item->next; |
delete item; |
} |
head.next = head.prev = &head; |
} |
void |
DLList::Iterator::erase() |
{ |
Item *rem = pos; |
if (rem == term) |
return; |
pos = pos->next; |
DLLIST_DEL(rem); |
delete rem; |
} |
void DLList::Iterator::moveToList(DLList& dest) |
{ |
Item *item = pos; |
assert(term != &dest.head); |
assert(pos != term); |
pos = pos->next; |
DLLIST_DEL(item); |
DLLIST_ADDHEAD(&dest.head, item); |
} |
bool |
DLList::Iterator::insert(void *data) |
{ |
Item *ins = new Item(data); |
ins->next = pos->next; |
ins->prev = pos; |
pos->next->prev = ins; |
pos->next = ins; |
if (pos == term) |
term = ins; |
return true; |
} |
void |
Stack::moveTo(Stack& that) |
{ |
unsigned int newSize = this->size + that.size; |
while (newSize > that.limit) |
that.resize(); |
memcpy(&that.array[that.size], &array[0], this->size * sizeof(Item)); |
that.size = newSize; |
this->size = 0; |
} |
Interval::Interval(const Interval& that) : head(NULL), tail(NULL) |
{ |
this->insert(that); |
} |
Interval::~Interval() |
{ |
clear(); |
} |
void |
Interval::clear() |
{ |
for (Range *next, *r = head; r; r = next) { |
next = r->next; |
delete r; |
} |
head = tail = NULL; |
} |
bool |
Interval::extend(int a, int b) |
{ |
Range *r, **nextp = &head; |
// NOTE: we need empty intervals for fixed registers |
// if (a == b) |
// return false; |
assert(a <= b); |
for (r = head; r; r = r->next) { |
if (b < r->bgn) |
break; // insert before |
if (a > r->end) { |
// insert after |
nextp = &r->next; |
continue; |
} |
// overlap |
if (a < r->bgn) { |
r->bgn = a; |
if (b > r->end) |
r->end = b; |
r->coalesce(&tail); |
return true; |
} |
if (b > r->end) { |
r->end = b; |
r->coalesce(&tail); |
return true; |
} |
assert(a >= r->bgn); |
assert(b <= r->end); |
return true; |
} |
(*nextp) = new Range(a, b); |
(*nextp)->next = r; |
for (r = (*nextp); r->next; r = r->next); |
tail = r; |
return true; |
} |
bool Interval::contains(int pos) const |
{ |
for (Range *r = head; r && r->bgn <= pos; r = r->next) |
if (r->end > pos) |
return true; |
return false; |
} |
bool Interval::overlaps(const Interval &that) const |
{ |
#if 1 |
Range *a = this->head; |
Range *b = that.head; |
while (a && b) { |
if (b->bgn < a->end && |
b->end > a->bgn) |
return true; |
if (a->end <= b->bgn) |
a = a->next; |
else |
b = b->next; |
} |
#else |
for (Range *rA = this->head; rA; rA = rA->next) |
for (Range *rB = iv.head; rB; rB = rB->next) |
if (rB->bgn < rA->end && |
rB->end > rA->bgn) |
return true; |
#endif |
return false; |
} |
void Interval::insert(const Interval &that) |
{ |
for (Range *r = that.head; r; r = r->next) |
this->extend(r->bgn, r->end); |
} |
void Interval::unify(Interval &that) |
{ |
assert(this != &that); |
for (Range *next, *r = that.head; r; r = next) { |
next = r->next; |
this->extend(r->bgn, r->end); |
delete r; |
} |
that.head = NULL; |
} |
int Interval::length() const |
{ |
int len = 0; |
for (Range *r = head; r; r = r->next) |
len += r->bgn - r->end; |
return len; |
} |
void Interval::print() const |
{ |
if (!head) |
return; |
INFO("[%i %i)", head->bgn, head->end); |
for (const Range *r = head->next; r; r = r->next) |
INFO(" [%i %i)", r->bgn, r->end); |
INFO("\n"); |
} |
void |
BitSet::andNot(const BitSet &set) |
{ |
assert(data && set.data); |
assert(size >= set.size); |
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i) |
data[i] &= ~set.data[i]; |
} |
BitSet& BitSet::operator|=(const BitSet &set) |
{ |
assert(data && set.data); |
assert(size >= set.size); |
for (unsigned int i = 0; i < (set.size + 31) / 32; ++i) |
data[i] |= set.data[i]; |
return *this; |
} |
bool BitSet::resize(unsigned int nBits) |
{ |
if (!data || !nBits) |
return allocate(nBits, true); |
const unsigned int p = (size + 31) / 32; |
const unsigned int n = (nBits + 31) / 32; |
if (n == p) |
return true; |
data = (uint32_t *)REALLOC(data, 4 * p, 4 * n); |
if (!data) { |
size = 0; |
return false; |
} |
if (n > p) |
memset(&data[p], 0, (n - p) * 4); |
if (nBits < size && (nBits % 32)) |
data[(nBits + 31) / 32 - 1] &= (1 << (nBits % 32)) - 1; |
size = nBits; |
return true; |
} |
bool BitSet::allocate(unsigned int nBits, bool zero) |
{ |
if (data && size < nBits) { |
FREE(data); |
data = NULL; |
} |
size = nBits; |
if (!data) |
data = reinterpret_cast<uint32_t *>(CALLOC((size + 31) / 32, 4)); |
if (zero) |
memset(data, 0, (size + 7) / 8); |
else |
if (size % 32) // clear unused bits (e.g. for popCount) |
data[(size + 31) / 32 - 1] &= (1 << (size % 32)) - 1; |
return data; |
} |
unsigned int BitSet::popCount() const |
{ |
unsigned int count = 0; |
for (unsigned int i = 0; i < (size + 31) / 32; ++i) |
if (data[i]) |
count += util_bitcount(data[i]); |
return count; |
} |
void BitSet::fill(uint32_t val) |
{ |
unsigned int i; |
for (i = 0; i < (size + 31) / 32; ++i) |
data[i] = val; |
if (val) |
data[i] &= ~(0xffffffff << (size % 32)); // BE ? |
} |
void BitSet::setOr(BitSet *pA, BitSet *pB) |
{ |
if (!pB) { |
*this = *pA; |
} else { |
for (unsigned int i = 0; i < (size + 31) / 32; ++i) |
data[i] = pA->data[i] | pB->data[i]; |
} |
} |
int BitSet::findFreeRange(unsigned int count) const |
{ |
const uint32_t m = (1 << count) - 1; |
int pos = size; |
unsigned int i; |
const unsigned int end = (size + 31) / 32; |
if (count == 1) { |
for (i = 0; i < end; ++i) { |
pos = ffs(~data[i]) - 1; |
if (pos >= 0) |
break; |
} |
} else |
if (count == 2) { |
for (i = 0; i < end; ++i) { |
if (data[i] != 0xffffffff) { |
uint32_t b = data[i] | (data[i] >> 1) | 0xaaaaaaaa; |
pos = ffs(~b) - 1; |
if (pos >= 0) |
break; |
} |
} |
} else |
if (count == 4 || count == 3) { |
for (i = 0; i < end; ++i) { |
if (data[i] != 0xffffffff) { |
uint32_t b = |
(data[i] >> 0) | (data[i] >> 1) | |
(data[i] >> 2) | (data[i] >> 3) | 0xeeeeeeee; |
pos = ffs(~b) - 1; |
if (pos >= 0) |
break; |
} |
} |
} else { |
if (count <= 8) |
count = 8; |
else |
if (count <= 16) |
count = 16; |
else |
count = 32; |
for (i = 0; i < end; ++i) { |
if (data[i] != 0xffffffff) { |
for (pos = 0; pos < 32; pos += count) |
if (!(data[i] & (m << pos))) |
break; |
if (pos < 32) |
break; |
} |
} |
} |
pos += i * 32; |
return ((pos + count) <= size) ? pos : -1; |
} |
void BitSet::print() const |
{ |
unsigned int n = 0; |
INFO("BitSet of size %u:\n", size); |
for (unsigned int i = 0; i < (size + 31) / 32; ++i) { |
uint32_t bits = data[i]; |
while (bits) { |
int pos = ffs(bits) - 1; |
bits &= ~(1 << pos); |
INFO(" %i", i * 32 + pos); |
++n; |
if ((n % 16) == 0) |
INFO("\n"); |
} |
} |
if (n % 16) |
INFO("\n"); |
} |
} // namespace nv50_ir |
/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h |
---|
0,0 → 1,789 |
/* |
* Copyright 2011 Christoph Bumiller |
* |
* Permission is hereby granted, free of charge, to any person obtaining a |
* copy of this software and associated documentation files (the "Software"), |
* to deal in the Software without restriction, including without limitation |
* the rights to use, copy, modify, merge, publish, distribute, sublicense, |
* and/or sell copies of the Software, and to permit persons to whom the |
* Software is furnished to do so, subject to the following conditions: |
* |
* The above copyright notice and this permission notice shall be included in |
* all copies or substantial portions of the Software. |
* |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
* OTHER DEALINGS IN THE SOFTWARE. |
*/ |
#ifndef __NV50_IR_UTIL_H__ |
#define __NV50_IR_UTIL_H__ |
#include <new> |
#include <assert.h> |
#include <stdio.h> |
#include <memory> |
#include <map> |
#ifndef NDEBUG |
# include <typeinfo> |
#endif |
#include "util/u_inlines.h" |
#include "util/u_memory.h" |
#define ERROR(args...) debug_printf("ERROR: " args) |
#define WARN(args...) debug_printf("WARNING: " args) |
#define INFO(args...) debug_printf(args) |
#define INFO_DBG(m, f, args...) \ |
do { \ |
if (m & NV50_IR_DEBUG_##f) \ |
debug_printf(args); \ |
} while(0) |
#define FATAL(args...) \ |
do { \ |
fprintf(stderr, args); \ |
abort(); \ |
} while(0) |
#define NV50_IR_FUNC_ALLOC_OBJ_DEF(obj, f, args...) \ |
new ((f)->getProgram()->mem_##obj.allocate()) obj(f, args) |
#define new_Instruction(f, args...) \ |
NV50_IR_FUNC_ALLOC_OBJ_DEF(Instruction, f, args) |
#define new_CmpInstruction(f, args...) \ |
NV50_IR_FUNC_ALLOC_OBJ_DEF(CmpInstruction, f, args) |
#define new_TexInstruction(f, args...) \ |
NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args) |
#define new_FlowInstruction(f, args...) \ |
NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args) |
#define new_LValue(f, args...) \ |
NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args) |
#define NV50_IR_PROG_ALLOC_OBJ_DEF(obj, p, args...) \ |
new ((p)->mem_##obj.allocate()) obj(p, args) |
#define new_Symbol(p, args...) \ |
NV50_IR_PROG_ALLOC_OBJ_DEF(Symbol, p, args) |
#define new_ImmediateValue(p, args...) \ |
NV50_IR_PROG_ALLOC_OBJ_DEF(ImmediateValue, p, args) |
#define delete_Instruction(p, insn) (p)->releaseInstruction(insn) |
#define delete_Value(p, val) (p)->releaseValue(val) |
namespace nv50_ir { |
class Iterator |
{ |
public: |
virtual ~Iterator() { }; |
virtual void next() = 0; |
virtual void *get() const = 0; |
virtual bool end() const = 0; // if true, get will return 0 |
virtual void reset() { assert(0); } // only for graph iterators |
}; |
typedef std::auto_ptr<Iterator> IteratorRef; |
class ManipIterator : public Iterator |
{ |
public: |
virtual bool insert(void *) = 0; // insert after current position |
virtual void erase() = 0; |
}; |
// WARNING: do not use a->prev/next for __item or __list |
#define DLLIST_DEL(__item) \ |
do { \ |
(__item)->prev->next = (__item)->next; \ |
(__item)->next->prev = (__item)->prev; \ |
(__item)->next = (__item); \ |
(__item)->prev = (__item); \ |
} while(0) |
#define DLLIST_ADDTAIL(__list, __item) \ |
do { \ |
(__item)->next = (__list); \ |
(__item)->prev = (__list)->prev; \ |
(__list)->prev->next = (__item); \ |
(__list)->prev = (__item); \ |
} while(0) |
#define DLLIST_ADDHEAD(__list, __item) \ |
do { \ |
(__item)->prev = (__list); \ |
(__item)->next = (__list)->next; \ |
(__list)->next->prev = (__item); \ |
(__list)->next = (__item); \ |
} while(0) |
#define DLLIST_MERGE(__listA, __listB, ty) \ |
do { \ |
ty prevB = (__listB)->prev; \ |
(__listA)->prev->next = (__listB); \ |
(__listB)->prev->next = (__listA); \ |
(__listB)->prev = (__listA)->prev; \ |
(__listA)->prev = prevB; \ |
} while(0) |
#define DLLIST_EMPTY(__list) ((__list)->next == (__list)) |
#define DLLIST_FOR_EACH(list, it) \ |
for (DLList::Iterator (it) = (list)->iterator(); !(it).end(); (it).next()) |
class DLList |
{ |
public: |
class Item |
{ |
public: |
Item(void *priv) : next(this), prev(this), data(priv) { } |
public: |
Item *next; |
Item *prev; |
void *data; |
}; |
DLList() : head(0) { } |
~DLList() { clear(); } |
inline void insertHead(void *data) |
{ |
Item *item = new Item(data); |
assert(data); |
item->prev = &head; |
item->next = head.next; |
head.next->prev = item; |
head.next = item; |
} |
inline void insertTail(void *data) |
{ |
Item *item = new Item(data); |
assert(data); |
DLLIST_ADDTAIL(&head, item); |
} |
inline void insert(void *data) { insertTail(data); } |
void clear(); |
class Iterator : public ManipIterator |
{ |
public: |
Iterator(Item *head, bool r) : rev(r), pos(r ? head->prev : head->next), |
term(head) { } |
virtual void next() { if (!end()) pos = rev ? pos->prev : pos->next; } |
virtual void *get() const { return pos->data; } |
virtual bool end() const { return pos == term; } |
// caution: if you're at end-2 and erase it, then do next, you're at end |
virtual void erase(); |
virtual bool insert(void *data); |
// move item to a another list, no consistency with its iterators though |
void moveToList(DLList&); |
private: |
const bool rev; |
Item *pos; |
Item *term; |
friend class DLList; |
}; |
inline void erase(Iterator& pos) |
{ |
pos.erase(); |
} |
Iterator iterator() |
{ |
return Iterator(&head, false); |
} |
Iterator revIterator() |
{ |
return Iterator(&head, true); |
} |
private: |
Item head; |
}; |
class Stack |
{ |
public: |
class Item { |
public: |
union { |
void *p; |
int i; |
unsigned int u; |
float f; |
double d; |
} u; |
Item() { memset(&u, 0, sizeof(u)); } |
}; |
Stack() : size(0), limit(0), array(0) { } |
~Stack() { if (array) FREE(array); } |
inline void push(int i) { Item data; data.u.i = i; push(data); } |
inline void push(unsigned int u) { Item data; data.u.u = u; push(data); } |
inline void push(void *p) { Item data; data.u.p = p; push(data); } |
inline void push(float f) { Item data; data.u.f = f; push(data); } |
inline void push(Item data) |
{ |
if (size == limit) |
resize(); |
array[size++] = data; |
} |
inline Item pop() |
{ |
if (!size) { |
Item data; |
assert(0); |
return data; |
} |
return array[--size]; |
} |
inline unsigned int getSize() { return size; } |
inline Item& peek() { assert(size); return array[size - 1]; } |
void clear(bool releaseStorage = false) |
{ |
if (releaseStorage && array) |
FREE(array); |
size = limit = 0; |
} |
void moveTo(Stack&); // move all items to target (not like push(pop())) |
private: |
void resize() |
{ |
unsigned int sizeOld, sizeNew; |
sizeOld = limit * sizeof(Item); |
limit = MAX2(4, limit + limit); |
sizeNew = limit * sizeof(Item); |
array = (Item *)REALLOC(array, sizeOld, sizeNew); |
} |
unsigned int size; |
unsigned int limit; |
Item *array; |
}; |
class DynArray |
{ |
public: |
class Item |
{ |
public: |
union { |
uint32_t u32; |
void *p; |
}; |
}; |
DynArray() : data(NULL), size(0) { } |
~DynArray() { if (data) FREE(data); } |
inline Item& operator[](unsigned int i) |
{ |
if (i >= size) |
resize(i); |
return data[i]; |
} |
inline const Item operator[](unsigned int i) const |
{ |
return data[i]; |
} |
void resize(unsigned int index) |
{ |
const unsigned int oldSize = size * sizeof(Item); |
if (!size) |
size = 8; |
while (size <= index) |
size <<= 1; |
data = (Item *)REALLOC(data, oldSize, size * sizeof(Item)); |
} |
void clear() |
{ |
FREE(data); |
data = NULL; |
size = 0; |
} |
private: |
Item *data; |
unsigned int size; |
}; |
class ArrayList |
{ |
public: |
ArrayList() : size(0) { } |
void insert(void *item, int& id) |
{ |
id = ids.getSize() ? ids.pop().u.i : size++; |
data[id].p = item; |
} |
void remove(int& id) |
{ |
const unsigned int uid = id; |
assert(uid < size && data[id].p); |
ids.push(uid); |
data[uid].p = NULL; |
id = -1; |
} |
inline int getSize() const { return size; } |
inline void *get(unsigned int id) { assert(id < size); return data[id].p; } |
class Iterator : public nv50_ir::Iterator |
{ |
public: |
Iterator(const ArrayList *array) : pos(0), data(array->data) |
{ |
size = array->getSize(); |
if (size) |
nextValid(); |
} |
void nextValid() { while ((pos < size) && !data[pos].p) ++pos; } |
void next() { if (pos < size) { ++pos; nextValid(); } } |
void *get() const { assert(pos < size); return data[pos].p; } |
bool end() const { return pos >= size; } |
private: |
unsigned int pos; |
unsigned int size; |
const DynArray& data; |
friend class ArrayList; |
}; |
Iterator iterator() const { return Iterator(this); } |
void clear() |
{ |
data.clear(); |
ids.clear(true); |
size = 0; |
} |
private: |
DynArray data; |
Stack ids; |
unsigned int size; |
}; |
class Interval |
{ |
public: |
Interval() : head(0), tail(0) { } |
Interval(const Interval&); |
~Interval(); |
bool extend(int, int); |
void insert(const Interval&); |
void unify(Interval&); // clears source interval |
void clear(); |
inline int begin() const { return head ? head->bgn : -1; } |
inline int end() const { checkTail(); return tail ? tail->end : -1; } |
inline bool isEmpty() const { return !head; } |
bool overlaps(const Interval&) const; |
bool contains(int pos) const; |
inline int extent() const { return end() - begin(); } |
int length() const; |
void print() const; |
inline void checkTail() const; |
private: |
class Range |
{ |
public: |
Range(int a, int b) : next(0), bgn(a), end(b) { } |
Range *next; |
int bgn; |
int end; |
void coalesce(Range **ptail) |
{ |
Range *rnn; |
while (next && end >= next->bgn) { |
assert(bgn <= next->bgn); |
rnn = next->next; |
end = MAX2(end, next->end); |
delete next; |
next = rnn; |
} |
if (!next) |
*ptail = this; |
} |
}; |
Range *head; |
Range *tail; |
}; |
class BitSet |
{ |
public: |
BitSet() : marker(false), data(0), size(0) { } |
BitSet(unsigned int nBits, bool zero) : marker(false), data(0), size(0) |
{ |
allocate(nBits, zero); |
} |
~BitSet() |
{ |
if (data) |
FREE(data); |
} |
// allocate will keep old data iff size is unchanged |
bool allocate(unsigned int nBits, bool zero); |
bool resize(unsigned int nBits); // keep old data, zero additional bits |
inline unsigned int getSize() const { return size; } |
void fill(uint32_t val); |
void setOr(BitSet *, BitSet *); // second BitSet may be NULL |
inline void set(unsigned int i) |
{ |
assert(i < size); |
data[i / 32] |= 1 << (i % 32); |
} |
// NOTE: range may not cross 32 bit boundary (implies n <= 32) |
inline void setRange(unsigned int i, unsigned int n) |
{ |
assert((i + n) <= size && (((i % 32) + n) <= 32)); |
data[i / 32] |= ((1 << n) - 1) << (i % 32); |
} |
inline void setMask(unsigned int i, uint32_t m) |
{ |
assert(i < size); |
data[i / 32] |= m; |
} |
inline void clr(unsigned int i) |
{ |
assert(i < size); |
data[i / 32] &= ~(1 << (i % 32)); |
} |
// NOTE: range may not cross 32 bit boundary (implies n <= 32) |
inline void clrRange(unsigned int i, unsigned int n) |
{ |
assert((i + n) <= size && (((i % 32) + n) <= 32)); |
data[i / 32] &= ~(((1 << n) - 1) << (i % 32)); |
} |
inline bool test(unsigned int i) const |
{ |
assert(i < size); |
return data[i / 32] & (1 << (i % 32)); |
} |
// NOTE: range may not cross 32 bit boundary (implies n <= 32) |
inline bool testRange(unsigned int i, unsigned int n) const |
{ |
assert((i + n) <= size && (((i % 32) + n) <= 32)); |
return data[i / 32] & (((1 << n) - 1) << (i % 32)); |
} |
// Find a range of size (<= 32) clear bits aligned to roundup_pow2(size). |
int findFreeRange(unsigned int size) const; |
BitSet& operator|=(const BitSet&); |
BitSet& operator=(const BitSet& set) |
{ |
assert(data && set.data); |
assert(size == set.size); |
memcpy(data, set.data, (set.size + 7) / 8); |
return *this; |
} |
void andNot(const BitSet&); |
// bits = (bits | setMask) & ~clrMask |
inline void periodicMask32(uint32_t setMask, uint32_t clrMask) |
{ |
for (unsigned int i = 0; i < (size + 31) / 32; ++i) |
data[i] = (data[i] | setMask) & ~clrMask; |
} |
unsigned int popCount() const; |
void print() const; |
public: |
bool marker; // for user |
private: |
uint32_t *data; |
unsigned int size; |
}; |
void Interval::checkTail() const |
{ |
#if NV50_DEBUG & NV50_DEBUG_PROG_RA |
Range *r = head; |
while (r->next) |
r = r->next; |
assert(tail == r); |
#endif |
} |
class MemoryPool |
{ |
private: |
inline bool enlargeAllocationsArray(const unsigned int id, unsigned int nr) |
{ |
const unsigned int size = sizeof(uint8_t *) * id; |
const unsigned int incr = sizeof(uint8_t *) * nr; |
uint8_t **alloc = (uint8_t **)REALLOC(allocArray, size, size + incr); |
if (!alloc) |
return false; |
allocArray = alloc; |
return true; |
} |
inline bool enlargeCapacity() |
{ |
const unsigned int id = count >> objStepLog2; |
uint8_t *const mem = (uint8_t *)MALLOC(objSize << objStepLog2); |
if (!mem) |
return false; |
if (!(id % 32)) { |
if (!enlargeAllocationsArray(id, 32)) { |
FREE(mem); |
return false; |
} |
} |
allocArray[id] = mem; |
return true; |
} |
public: |
MemoryPool(unsigned int size, unsigned int incr) : objSize(size), |
objStepLog2(incr) |
{ |
allocArray = NULL; |
released = NULL; |
count = 0; |
} |
~MemoryPool() |
{ |
unsigned int allocCount = (count + (1 << objStepLog2) - 1) >> objStepLog2; |
for (unsigned int i = 0; i < allocCount && allocArray[i]; ++i) |
FREE(allocArray[i]); |
if (allocArray) |
FREE(allocArray); |
} |
void *allocate() |
{ |
void *ret; |
const unsigned int mask = (1 << objStepLog2) - 1; |
if (released) { |
ret = released; |
released = *(void **)released; |
return ret; |
} |
if (!(count & mask)) |
if (!enlargeCapacity()) |
return NULL; |
ret = allocArray[count >> objStepLog2] + (count & mask) * objSize; |
++count; |
return ret; |
} |
void release(void *ptr) |
{ |
*(void **)ptr = released; |
released = ptr; |
} |
private: |
uint8_t **allocArray; // array (list) of MALLOC allocations |
void *released; // list of released objects |
unsigned int count; // highest allocated object |
const unsigned int objSize; |
const unsigned int objStepLog2; |
}; |
/** |
* Composite object cloning policy. |
* |
* Encapsulates how sub-objects are to be handled (if at all) when a |
* composite object is being cloned. |
*/ |
template<typename C> |
class ClonePolicy |
{ |
protected: |
C *c; |
public: |
ClonePolicy(C *c) : c(c) {} |
C *context() { return c; } |
template<typename T> T *get(T *obj) |
{ |
void *clone = lookup(obj); |
if (!clone) |
clone = obj->clone(*this); |
return reinterpret_cast<T *>(clone); |
} |
template<typename T> void set(const T *obj, T *clone) |
{ |
insert(obj, clone); |
} |
protected: |
virtual void *lookup(void *obj) = 0; |
virtual void insert(const void *obj, void *clone) = 0; |
}; |
/** |
* Shallow non-recursive cloning policy. |
* |
* Objects cloned with the "shallow" policy don't clone their |
* children recursively, instead, the new copy shares its children |
* with the original object. |
*/ |
template<typename C> |
class ShallowClonePolicy : public ClonePolicy<C> |
{ |
public: |
ShallowClonePolicy(C *c) : ClonePolicy<C>(c) {} |
protected: |
virtual void *lookup(void *obj) |
{ |
return obj; |
} |
virtual void insert(const void *obj, void *clone) |
{ |
} |
}; |
template<typename C, typename T> |
inline T *cloneShallow(C *c, T *obj) |
{ |
ShallowClonePolicy<C> pol(c); |
return obj->clone(pol); |
} |
/** |
* Recursive cloning policy. |
* |
* Objects cloned with the "deep" policy clone their children |
* recursively, keeping track of what has already been cloned to |
* avoid making several new copies of the same object. |
*/ |
template<typename C> |
class DeepClonePolicy : public ClonePolicy<C> |
{ |
public: |
DeepClonePolicy(C *c) : ClonePolicy<C>(c) {} |
private: |
std::map<const void *, void *> map; |
protected: |
virtual void *lookup(void *obj) |
{ |
return map[obj]; |
} |
virtual void insert(const void *obj, void *clone) |
{ |
map[obj] = clone; |
} |
}; |
template<typename S, typename T> |
struct bimap |
{ |
std::map<S, T> forth; |
std::map<T, S> back; |
public: |
bimap() : l(back), r(forth) { } |
bimap(const bimap<S, T> &m) |
: forth(m.forth), back(m.back), l(back), r(forth) { } |
void insert(const S &s, const T &t) |
{ |
forth.insert(std::make_pair(s, t)); |
back.insert(std::make_pair(t, s)); |
} |
typedef typename std::map<T, S>::const_iterator l_iterator; |
const std::map<T, S> &l; |
typedef typename std::map<S, T>::const_iterator r_iterator; |
const std::map<S, T> &r; |
}; |
} // namespace nv50_ir |
#endif // __NV50_IR_UTIL_H__ |